diff --git a/.ci/monolithic-windows.sh b/.ci/monolithic-windows.sh index 4fd88ea81c84a..91e719c52d436 100755 --- a/.ci/monolithic-windows.sh +++ b/.ci/monolithic-windows.sh @@ -44,6 +44,8 @@ pip install -q -r "${MONOREPO_ROOT}"/mlir/python/requirements.txt # see https://github.com/llvm/llvm-project/pull/82393 and # https://discourse.llvm.org/t/rfc-future-of-windows-pre-commit-ci/76840/40 # for further information. +# We limit the number of parallel compile jobs to 24 control memory +# consumption and improve build reliability. cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \ -D LLVM_ENABLE_PROJECTS="${projects}" \ -G Ninja \ @@ -58,7 +60,9 @@ cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \ -D MLIR_ENABLE_BINDINGS_PYTHON=ON \ -D CMAKE_EXE_LINKER_FLAGS="/MANIFEST:NO" \ -D CMAKE_MODULE_LINKER_FLAGS="/MANIFEST:NO" \ - -D CMAKE_SHARED_LINKER_FLAGS="/MANIFEST:NO" + -D CMAKE_SHARED_LINKER_FLAGS="/MANIFEST:NO" \ + -D LLVM_PARALLEL_COMPILE_JOBS=16 \ + -D LLVM_PARALLEL_LINK_JOBS=4 echo "--- ninja" # Targets are not escaped as they are passed as separate arguments. diff --git a/.github/workflows/restart-preempted-libcxx-jobs.yaml b/.github/workflows/restart-preempted-libcxx-jobs.yaml new file mode 100644 index 0000000000000..71e27ff2abb9f --- /dev/null +++ b/.github/workflows/restart-preempted-libcxx-jobs.yaml @@ -0,0 +1,105 @@ +name: Restart Preempted Libc++ Workflow + +# The libc++ builders run on preemptable VMs, which can be shutdown at any time. +# This workflow identifies when a workflow run was canceled due to the VM being preempted, +# and restarts the workflow run. + +# We identify a canceled workflow run by checking the annotations of the check runs in the check suite, +# which should contain the message "The runner has received a shutdown signal." + +# Note: If a job is both preempted and also contains a non-preemption failure, we do not restart the workflow. + +on: + workflow_run: + workflows: [Build and Test libc\+\+] + types: + - completed + +permissions: + contents: read + +jobs: + restart: + if: github.repository_owner == 'llvm' && (github.event.workflow_run.conclusion == 'failure' || github.event.workflow_run.conclusion == 'cancelled') + name: "Restart Job" + permissions: + statuses: read + checks: read + actions: write + runs-on: ubuntu-latest + steps: + - name: "Restart Job" + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea #v7.0.1 + with: + script: | + const failure_regex = /Process completed with exit code 1./ + const preemption_regex = /The runner has received a shutdown signal/ + + console.log('Listing check runs for suite') + const check_suites = await github.rest.checks.listForSuite({ + owner: context.repo.owner, + repo: context.repo.repo, + check_suite_id: context.payload.workflow_run.check_suite_id + }) + + check_run_ids = []; + for (check_run of check_suites.data.check_runs) { + console.log('Checking check run: ' + check_run.id); + if (check_run.status != 'completed') { + console.log('Check run was not completed. Skipping.'); + continue; + } + if (check_run.conclusion != 'failure' && check_run.conclusion != 'cancelled') { + console.log('Check run had conclusion: ' + check_run.conclusion + '. Skipping.'); + continue; + } + check_run_ids.push(check_run.id); + } + + has_preempted_job = false; + + for (check_run_id of check_run_ids) { + console.log('Listing annotations for check run: ' + check_run_id); + + annotations = await github.rest.checks.listAnnotations({ + owner: context.repo.owner, + repo: context.repo.repo, + check_run_id: check_run_id + }) + + for (annotation of annotations.data) { + if (annotation.annotation_level != 'failure') { + continue; + } + + const preemption_match = annotation.message.match(preemption_regex); + + if (preemption_match != null) { + console.log('Found preemption message: ' + annotation.message); + has_preempted_job = true; + } + + const failure_match = annotation.message.match(failure_regex); + if (failure_match != null) { + // We only want to restart the workflow if all of the failures were due to preemption. + // We don't want to restart the workflow if there were other failures. + console.log('Choosing not to rerun workflow because we found a non-preemption failure'); + console.log('Failure message: ' + annotation.message); + return; + } + } + } + + if (!has_preempted_job) { + console.log('No preempted jobs found. Not restarting workflow.'); + return; + } + + console.log("Restarted workflow: " + context.payload.workflow_run.id); + await github.rest.actions.reRunWorkflowFailedJobs({ + owner: context.repo.owner, + repo: context.repo.repo, + run_id: context.payload.workflow_run.id + }) + + diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp index c3208392df156..828f13805a698 100644 --- a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp @@ -1414,13 +1414,21 @@ IdentifierNamingCheck::getDiagInfo(const NamingCheckId &ID, }}; } +StringRef IdentifierNamingCheck::getRealFileName(StringRef FileName) const { + auto Iter = RealFileNameCache.try_emplace(FileName); + SmallString<256U> &RealFileName = Iter.first->getValue(); + if (!Iter.second) + return RealFileName; + llvm::sys::fs::real_path(FileName, RealFileName); + return RealFileName; +} + const IdentifierNamingCheck::FileStyle & IdentifierNamingCheck::getStyleForFile(StringRef FileName) const { if (!GetConfigPerFile) return *MainFileStyle; - SmallString<128> RealFileName; - llvm::sys::fs::real_path(FileName, RealFileName); + StringRef RealFileName = getRealFileName(FileName); StringRef Parent = llvm::sys::path::parent_path(RealFileName); auto Iter = NamingStylesCache.find(Parent); if (Iter != NamingStylesCache.end()) diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h index 27c8e4bc768c4..646ec0eac8dd1 100644 --- a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h +++ b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h @@ -205,6 +205,7 @@ class IdentifierNamingCheck final : public RenamerClangTidyCheck { const NamingCheckFailure &Failure) const override; const FileStyle &getStyleForFile(StringRef FileName) const; + StringRef getRealFileName(StringRef FileName) const; /// Find the style kind of a field in an anonymous record. StyleKind findStyleKindForAnonField( @@ -222,6 +223,7 @@ class IdentifierNamingCheck final : public RenamerClangTidyCheck { /// Stores the style options as a vector, indexed by the specified \ref /// StyleKind, for a given directory. mutable llvm::StringMap NamingStylesCache; + mutable llvm::StringMap> RealFileNameCache; FileStyle *MainFileStyle; ClangTidyContext *Context; const bool GetConfigPerFile; diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability/identifier-length.rst b/clang-tools-extra/docs/clang-tidy/checks/readability/identifier-length.rst index 44d97f7b363bf..271970c292c8f 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/readability/identifier-length.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/readability/identifier-length.rst @@ -28,10 +28,7 @@ The following options are described below: .. code-block:: c++ - int doubler(int x) // warns that x is too short - { - return 2 * x; - } + int i = 42; // warns that 'i' is too short This check does not have any fix suggestions in the general case since variable names have semantic value. @@ -50,7 +47,10 @@ The following options are described below: .. code-block:: c++ - int i = 42; // warns that 'i' is too short + int doubler(int x) // warns that x is too short + { + return 2 * x; + } This check does not have any fix suggestions in the general case since variable names have semantic value. diff --git a/clang/docs/InternalsManual.rst b/clang/docs/InternalsManual.rst index b3e2b870ae5f9..3d21e37784b36 100644 --- a/clang/docs/InternalsManual.rst +++ b/clang/docs/InternalsManual.rst @@ -123,6 +123,44 @@ severe that error recovery won't be able to recover sensibly from them (thus spewing a ton of bogus errors). One example of this class of error are failure to ``#include`` a file. +Diagnostic Wording +^^^^^^^^^^^^^^^^^^ +The wording used for a diagnostic is critical because it is the only way for a +user to know how to correct their code. Use the following suggestions when +wording a diagnostic. + +* Diagnostics in Clang do not start with a capital letter and do not end with + punctuation. + + * This does not apply to proper nouns like ``Clang`` or ``OpenMP``, to + acronyms like ``GCC`` or ``ARC``, or to language standards like ``C23`` + or ``C++17``. + * A trailing question mark is allowed. e.g., ``unknown identifier %0; did + you mean %1?``. + +* Appropriately capitalize proper nouns like ``Clang``, ``OpenCL``, ``GCC``, + ``Objective-C``, etc and language standard versions like ``C11`` or ``C++11``. +* The wording should be succinct. If necessary, use a semicolon to combine + sentence fragments instead of using complete sentences. e.g., prefer wording + like ``'%0' is deprecated; it will be removed in a future release of Clang`` + over wording like ``'%0' is deprecated. It will be removed in a future release + of Clang``. +* The wording should be actionable and avoid using standards terms or grammar + productions that a new user would not be familiar with. e.g., prefer wording + like ``missing semicolon`` over wording like ``syntax error`` (which is not + actionable) or ``expected unqualified-id`` (which uses standards terminology). +* The wording should clearly explain what is wrong with the code rather than + restating what the code does. e.g., prefer wording like ``type %0 requires a + value in the range %1 to %2`` over wording like ``%0 is invalid``. +* The wording should have enough contextual information to help the user + identify the issue in a complex expression. e.g., prefer wording like + ``both sides of the %0 binary operator are identical`` over wording like + ``identical operands to binary operator``. +* Use single quotes to denote syntactic constructs or command line arguments + named in a diagnostic message. e.g., prefer wording like ``'this' pointer + cannot be null in well-defined C++ code`` over wording like ``this pointer + cannot be null in well-defined C++ code``. + The Format String ^^^^^^^^^^^^^^^^^ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 182f8b5824258..bd92818f0c09d 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -541,6 +541,9 @@ Improvements to Clang's diagnostics - Clang emits a ``-Wparentheses`` warning for expressions with consecutive comparisons like ``x < y < z``. Fixes #GH20456. +- Clang no longer emits a "declared here" note for a builtin function that has no declaration in source. + Fixes #GH93369. + Improvements to Clang's time-trace ---------------------------------- @@ -629,6 +632,9 @@ Bug Fixes in This Version - ``__is_array`` and ``__is_bounded_array`` no longer return ``true`` for zero-sized arrays. Fixes (#GH54705). +- Correctly reject declarations where a statement is required in C. + Fixes #GH92775 + Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -802,6 +808,9 @@ Bug Fixes to C++ Support - Fixed a regression introduced in Clang 18 causing a static function overloading a non-static function with the same parameters not to be diagnosed. (Fixes #GH93456). - Clang now diagnoses unexpanded parameter packs in attributes. (Fixes #GH93269). +- Clang now allows ``@$``` in raw string literals. Fixes (#GH93130). +- Fix an assertion failure when checking invalid ``this`` usage in the wrong context. (Fixes #GH91536). +- Clang no longer models dependent NTTP arguments as ``TemplateParamObjectDecl`` s. Fixes (#GH84052). Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/AST/StmtOpenACC.h b/clang/include/clang/AST/StmtOpenACC.h index b706864798baa..04daf511f5871 100644 --- a/clang/include/clang/AST/StmtOpenACC.h +++ b/clang/include/clang/AST/StmtOpenACC.h @@ -31,6 +31,8 @@ class OpenACCConstructStmt : public Stmt { /// The location of the directive statement, from the '#' to the last token of /// the directive. SourceRange Range; + /// The location of the directive name. + SourceLocation DirectiveLoc; /// The list of clauses. This is stored here as an ArrayRef, as this is the /// most convienient place to access the list, however the list itself should @@ -39,8 +41,9 @@ class OpenACCConstructStmt : public Stmt { protected: OpenACCConstructStmt(StmtClass SC, OpenACCDirectiveKind K, - SourceLocation Start, SourceLocation End) - : Stmt(SC), Kind(K), Range(Start, End) {} + SourceLocation Start, SourceLocation DirectiveLoc, + SourceLocation End) + : Stmt(SC), Kind(K), Range(Start, End), DirectiveLoc(DirectiveLoc) {} // Used only for initialization, the leaf class can initialize this to // trailing storage. @@ -59,6 +62,7 @@ class OpenACCConstructStmt : public Stmt { SourceLocation getBeginLoc() const { return Range.getBegin(); } SourceLocation getEndLoc() const { return Range.getEnd(); } + SourceLocation getDirectiveLoc() const { return DirectiveLoc; } ArrayRef clauses() const { return Clauses; } child_range children() { @@ -81,9 +85,11 @@ class OpenACCAssociatedStmtConstruct : public OpenACCConstructStmt { protected: OpenACCAssociatedStmtConstruct(StmtClass SC, OpenACCDirectiveKind K, - SourceLocation Start, SourceLocation End, - Stmt *AssocStmt) - : OpenACCConstructStmt(SC, K, Start, End), AssociatedStmt(AssocStmt) {} + SourceLocation Start, + SourceLocation DirectiveLoc, + SourceLocation End, Stmt *AssocStmt) + : OpenACCConstructStmt(SC, K, Start, DirectiveLoc, End), + AssociatedStmt(AssocStmt) {} void setAssociatedStmt(Stmt *S) { AssociatedStmt = S; } Stmt *getAssociatedStmt() { return AssociatedStmt; } @@ -126,10 +132,10 @@ class OpenACCComputeConstruct final friend class ASTStmtReader; friend class ASTContext; OpenACCComputeConstruct(unsigned NumClauses) - : OpenACCAssociatedStmtConstruct(OpenACCComputeConstructClass, - OpenACCDirectiveKind::Invalid, - SourceLocation{}, SourceLocation{}, - /*AssociatedStmt=*/nullptr) { + : OpenACCAssociatedStmtConstruct( + OpenACCComputeConstructClass, OpenACCDirectiveKind::Invalid, + SourceLocation{}, SourceLocation{}, SourceLocation{}, + /*AssociatedStmt=*/nullptr) { // We cannot send the TrailingObjects storage to the base class (which holds // a reference to the data) until it is constructed, so we have to set it // separately here. @@ -141,11 +147,11 @@ class OpenACCComputeConstruct final } OpenACCComputeConstruct(OpenACCDirectiveKind K, SourceLocation Start, - SourceLocation End, + SourceLocation DirectiveLoc, SourceLocation End, ArrayRef Clauses, Stmt *StructuredBlock) : OpenACCAssociatedStmtConstruct(OpenACCComputeConstructClass, K, Start, - End, StructuredBlock) { + DirectiveLoc, End, StructuredBlock) { assert(isOpenACCComputeDirectiveKind(K) && "Only parallel, serial, and kernels constructs should be " "represented by this type"); @@ -169,8 +175,8 @@ class OpenACCComputeConstruct final unsigned NumClauses); static OpenACCComputeConstruct * Create(const ASTContext &C, OpenACCDirectiveKind K, SourceLocation BeginLoc, - SourceLocation EndLoc, ArrayRef Clauses, - Stmt *StructuredBlock); + SourceLocation DirectiveLoc, SourceLocation EndLoc, + ArrayRef Clauses, Stmt *StructuredBlock); Stmt *getStructuredBlock() { return getAssociatedStmt(); } const Stmt *getStructuredBlock() const { diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index e59cccccdd369..ef9df1e9d8b4a 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2025,9 +2025,12 @@ def Convergent : InheritableAttr { def NoInline : DeclOrStmtAttr { let Spellings = [CustomKeyword<"__noinline__">, GCC<"noinline">, CXX11<"clang", "noinline">, C23<"clang", "noinline">, + CXX11<"msvc", "noinline">, C23<"msvc", "noinline">, Declspec<"noinline">]; - let Accessors = [Accessor<"isClangNoInline", [CXX11<"clang", "noinline">, - C23<"clang", "noinline">]>]; + let Accessors = [Accessor<"isStmtNoInline", [CXX11<"clang", "noinline">, + C23<"clang", "noinline">, + CXX11<"msvc", "noinline">, + C23<"msvc", "noinline">]>]; let Documentation = [NoInlineDocs]; let Subjects = SubjectList<[Function, Stmt], WarnDiag, "functions and statements">; diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def index fd8c1b480d6da..4e48ff48b60f5 100644 --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -135,6 +135,10 @@ TARGET_BUILTIN(__builtin_wasm_min_f64x2, "V2dV2dV2d", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_max_f64x2, "V2dV2dV2d", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_pmin_f64x2, "V2dV2dV2d", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_pmax_f64x2, "V2dV2dV2d", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_min_f16x8, "V8hV8hV8h", "nc", "half-precision") +TARGET_BUILTIN(__builtin_wasm_max_f16x8, "V8hV8hV8h", "nc", "half-precision") +TARGET_BUILTIN(__builtin_wasm_pmin_f16x8, "V8hV8hV8h", "nc", "half-precision") +TARGET_BUILTIN(__builtin_wasm_pmax_f16x8, "V8hV8hV8h", "nc", "half-precision") TARGET_BUILTIN(__builtin_wasm_ceil_f32x4, "V4fV4f", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_floor_f32x4, "V4fV4f", "nc", "simd128") diff --git a/clang/include/clang/Basic/CharInfo.h b/clang/include/clang/Basic/CharInfo.h index d807955311828..d71857e8e5dcc 100644 --- a/clang/include/clang/Basic/CharInfo.h +++ b/clang/include/clang/Basic/CharInfo.h @@ -28,8 +28,7 @@ namespace charinfo { CHAR_LOWER = 0x0040, // a-z CHAR_UNDER = 0x0080, // _ CHAR_PERIOD = 0x0100, // . - CHAR_RAWDEL = 0x0200, // {}[]#<>%:;?*+-/^&|~!=,"' - CHAR_PUNCT = 0x0400 // `$@() + CHAR_PUNCT = 0x0200, // {}[]#<>%:;?*+-/^&|~!=,"'`$@() }; enum { @@ -152,7 +151,7 @@ LLVM_READONLY inline bool isHexDigit(unsigned char c) { /// Note that '_' is both a punctuation character and an identifier character! LLVM_READONLY inline bool isPunctuation(unsigned char c) { using namespace charinfo; - return (InfoTable[c] & (CHAR_UNDER|CHAR_PERIOD|CHAR_RAWDEL|CHAR_PUNCT)) != 0; + return (InfoTable[c] & (CHAR_UNDER | CHAR_PERIOD | CHAR_PUNCT)) != 0; } /// Return true if this character is an ASCII printable character; that is, a @@ -160,8 +159,8 @@ LLVM_READONLY inline bool isPunctuation(unsigned char c) { /// terminal. LLVM_READONLY inline bool isPrintable(unsigned char c) { using namespace charinfo; - return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD|CHAR_PUNCT| - CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL|CHAR_SPACE)) != 0; + return (InfoTable[c] & (CHAR_UPPER | CHAR_LOWER | CHAR_PERIOD | CHAR_PUNCT | + CHAR_DIGIT | CHAR_UNDER | CHAR_SPACE)) != 0; } /// Return true if this is the body character of a C preprocessing number, @@ -175,8 +174,9 @@ LLVM_READONLY inline bool isPreprocessingNumberBody(unsigned char c) { /// Return true if this is the body character of a C++ raw string delimiter. LLVM_READONLY inline bool isRawStringDelimBody(unsigned char c) { using namespace charinfo; - return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD| - CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL)) != 0; + return (InfoTable[c] & (CHAR_UPPER | CHAR_LOWER | CHAR_PERIOD | CHAR_DIGIT | + CHAR_UNDER | CHAR_PUNCT)) != 0 && + c != '(' && c != ')'; } enum class EscapeChar { diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 5a4551a96ca4e..25fbfe83fa2bc 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -111,6 +111,14 @@ def warn_cxx98_compat_raw_string_literal : Warning< "raw string literals are incompatible with C++98">, InGroup, DefaultIgnore; +def warn_cxx26_compat_raw_string_literal_character_set : Warning< + " '%0' in a raw string literal delimiter is incompatible " + "with standards before C++2c">, + InGroup, DefaultIgnore; +def ext_cxx26_raw_string_literal_character_set : Extension< + " '%0' in a raw string literal delimiter is a C++2c extension">, + InGroup, DefaultIgnore; + def warn_multichar_character_literal : Warning< "multi-character character constant">, InGroup; def warn_four_char_character_literal : Warning< diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 80e635e4a57ec..564a58e4eb670 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -146,6 +146,25 @@ let TargetGuard = "sme" in { [IsOverloadNone, IsStreamingCompatible, IsOutZA]>; } +let TargetGuard = "sme2p1" in { + def SVZERO_ZA64_VG1x2 : SInst<"svzero_za64_vg1x2", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg1x2", + [IsOverloadNone, IsStreaming, IsInOutZA]>; + def SVZERO_ZA64_VG1x4 : SInst<"svzero_za64_vg1x4", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg1x4", + [IsOverloadNone, IsStreaming, IsInOutZA]>; + def SVZERO_ZA64_VG2x1 : SInst<"svzero_za64_vg2x1", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg2x1", + [IsOverloadNone, IsStreaming, IsInOutZA]>; + def SVZERO_ZA64_VG2x2 : SInst<"svzero_za64_vg2x2", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg2x2", + [IsOverloadNone, IsStreaming, IsInOutZA]>; + def SVZERO_ZA64_VG2x4 : SInst<"svzero_za64_vg2x4", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg2x4", + [IsOverloadNone, IsStreaming, IsInOutZA]>; + def SVZERO_ZA64_VG4x1 : SInst<"svzero_za64_vg4x1", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg4x1", + [IsOverloadNone, IsStreaming, IsInOutZA]>; + def SVZERO_ZA64_VG4x2 : SInst<"svzero_za64_vg4x2", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg4x2", + [IsOverloadNone, IsStreaming, IsInOutZA]>; + def SVZERO_ZA64_VG4x4 : SInst<"svzero_za64_vg4x4", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg4x4", + [IsOverloadNone, IsStreaming, IsInOutZA]>; +} + //////////////////////////////////////////////////////////////////////////////// // SME - Counting elements in a streaming vector diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 03570f94de666..88938a981fd8a 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2151,6 +2151,11 @@ let TargetGuard = "sme2" in { def SVFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]", "44dd", "hfd", MergeNone, "aarch64_sve_fclamp_single_x4", [IsStreaming], []>; } +let TargetGuard = "sme2,b16b16"in { + def SVBFCLAMP_X2 : SInst<"svclamp[_single_{d}_x2]", "22dd", "b", MergeNone, "aarch64_sve_bfclamp_single_x2", [IsStreaming], []>; + def SVBFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]", "44dd", "b", MergeNone, "aarch64_sve_bfclamp_single_x4", [IsStreaming], []>; +} + let TargetGuard = "sme2" in { // == ADD (vectors) == def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>; @@ -2265,6 +2270,10 @@ let TargetGuard = "sme2" in { def SVCVT_S32_F32_X4 : SInst<"svcvt_{d}[_f32_x4]", "4.d4.M", "i", MergeNone, "aarch64_sve_fcvtzs_x4", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>; } +let TargetGuard = "sme-f16f16" in { + def SVCVT_F32_X2 : SInst<"svcvt_{d}[_f16_x2]", "2h", "f", MergeNone, "aarch64_sve_fcvt_widen_x2", [ IsStreaming],[]>; +} + // // Multi-vector floating-point convert from single-precision to interleaved half-precision/BFloat16 // @@ -2273,6 +2282,13 @@ let TargetGuard = "sme2" in { def SVCVTN_BF16_X2 : SInst<"svcvtn_bf16[_f32_x2]", "$2", "f", MergeNone, "aarch64_sve_bfcvtn_x2", [IsOverloadNone, IsStreaming],[]>; } +// +//Multi-vector floating-point convert from half-precision to deinterleaved single-precision. +// +let TargetGuard = "sme-f16f16" in { + def SVCVTL_F32_X2 : SInst<"svcvtl_f32[_f16_x2]", "2h", "f", MergeNone, "aarch64_sve_fcvtl_widen_x2", [ IsStreaming],[]>; +} + // // Multi-vector saturating extract narrow // diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index de2f245fb29f8..4119e69c85540 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6277,11 +6277,9 @@ def mapx_features_EQ : CommaJoined<["-"], "mapx-features=">, Group, Values<"egpr,push2pop2,ppx,ndd,ccmp,nf,cf">; def mno_apx_features_EQ : CommaJoined<["-"], "mno-apx-features=">, Group, HelpText<"Disable features of APX">, Values<"egpr,push2pop2,ppx,ndd,ccmp,nf,cf">; -// Features egpr, push2pop2, ppx and ndd are validated with llvm-test-suite && cpu2017 on Intel SDE. -// For stability, we turn on these features only for -mapxf. After a feature pass the validation, -// we will add it to -mapxf. -def mapxf : Flag<["-"], "mapxf">, Alias, AliasArgs<["egpr","push2pop2","ppx", "ndd"]>; -def mno_apxf : Flag<["-"], "mno-apxf">, Alias, AliasArgs<["egpr","push2pop2","ppx","ndd"]>; +// For stability, we only add a feature to -mapxf after it passes the validation of llvm-test-suite && cpu2017 on Intel SDE. +def mapxf : Flag<["-"], "mapxf">, Alias, AliasArgs<["egpr","push2pop2","ppx","ndd","ccmp","nf"]>; +def mno_apxf : Flag<["-"], "mno-apxf">, Alias, AliasArgs<["egpr","push2pop2","ppx","ndd","ccmp","nf"]>; } // let Flags = [TargetSpecific] // VE feature flags diff --git a/clang/include/clang/ExtractAPI/ExtractAPIVisitor.h b/clang/include/clang/ExtractAPI/ExtractAPIVisitor.h index 8ccebe457ed53..76d7fd798bed3 100644 --- a/clang/include/clang/ExtractAPI/ExtractAPIVisitor.h +++ b/clang/include/clang/ExtractAPI/ExtractAPIVisitor.h @@ -21,6 +21,7 @@ #include "clang/AST/DeclTemplate.h" #include "clang/AST/ParentMapContext.h" #include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Basic/LLVM.h" #include "clang/Basic/Module.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/Specifiers.h" @@ -127,7 +128,7 @@ class ExtractAPIVisitorBase : public RecursiveASTVisitor { protected: /// Collect API information for the enum constants and associate with the /// parent enum. - void recordEnumConstants(EnumRecord *EnumRecord, + void recordEnumConstants(SymbolReference Container, const EnumDecl::enumerator_range Constants); /// Collect API information for the Objective-C methods and associate with the @@ -248,12 +249,8 @@ class ExtractAPIVisitorBase : public RecursiveASTVisitor { clang::index::generateUSRForDecl(Tag, TagUSR); if (auto *Record = llvm::dyn_cast_if_present( API.findRecordForUSR(TagUSR))) { - if (Record->IsEmbeddedInVarDeclarator) { + if (Record->IsEmbeddedInVarDeclarator) NewRecordContext->stealRecordChain(*Record); - auto *NewRecord = cast(NewRecordContext); - if (NewRecord->Comment.empty()) - NewRecord->Comment = Record->Comment; - } } } }; @@ -394,17 +391,6 @@ bool ExtractAPIVisitorBase::VisitEnumDecl(const EnumDecl *Decl) { if (!getDerivedExtractAPIVisitor().shouldDeclBeIncluded(Decl)) return true; - SmallString<128> QualifiedNameBuffer; - // Collect symbol information. - StringRef Name = Decl->getName(); - if (Name.empty()) - Name = getTypedefName(Decl); - if (Name.empty()) { - llvm::raw_svector_ostream OS(QualifiedNameBuffer); - Decl->printQualifiedName(OS); - Name = QualifiedNameBuffer; - } - SmallString<128> USR; index::generateUSRForDecl(Decl, USR); PresumedLoc Loc = @@ -420,13 +406,29 @@ bool ExtractAPIVisitorBase::VisitEnumDecl(const EnumDecl *Decl) { DeclarationFragmentsBuilder::getFragmentsForEnum(Decl); DeclarationFragments SubHeading = DeclarationFragmentsBuilder::getSubHeading(Decl); - auto *ER = API.createRecord( - USR, Name, createHierarchyInformationForDecl(*Decl), Loc, - AvailabilityInfo::createFromDecl(Decl), Comment, Declaration, SubHeading, - isInSystemHeader(Decl), isEmbeddedInVarDeclarator(*Decl)); + + // Collect symbol information. + SymbolReference ParentContainer; + + if (Decl->hasNameForLinkage()) { + StringRef Name = Decl->getName(); + if (Name.empty()) + Name = getTypedefName(Decl); + + auto *ER = API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Comment, Declaration, + SubHeading, isInSystemHeader(Decl), false); + ParentContainer = SymbolReference(ER); + } else { + // If this an anonymous enum then the parent scope of the constants is the + // top level namespace. + ParentContainer = {}; + } // Now collect information about the enumerators in this enum. - getDerivedExtractAPIVisitor().recordEnumConstants(ER, Decl->enumerators()); + getDerivedExtractAPIVisitor().recordEnumConstants(ParentContainer, + Decl->enumerators()); return true; } @@ -1197,7 +1199,7 @@ bool ExtractAPIVisitorBase::VisitObjCCategoryDecl( /// parent enum. template void ExtractAPIVisitorBase::recordEnumConstants( - EnumRecord *EnumRecord, const EnumDecl::enumerator_range Constants) { + SymbolReference Container, const EnumDecl::enumerator_range Constants) { for (const auto *Constant : Constants) { // Collect symbol information. StringRef Name = Constant->getName(); @@ -1218,9 +1220,8 @@ void ExtractAPIVisitorBase::recordEnumConstants( DeclarationFragmentsBuilder::getSubHeading(Constant); API.createRecord( - USR, Name, createHierarchyInformationForDecl(*Constant), Loc, - AvailabilityInfo::createFromDecl(Constant), Comment, Declaration, - SubHeading, isInSystemHeader(Constant)); + USR, Name, Container, Loc, AvailabilityInfo::createFromDecl(Constant), + Comment, Declaration, SubHeading, isInSystemHeader(Constant)); } } @@ -1469,7 +1470,17 @@ class ExtractAPIVisitor bool shouldDeclBeIncluded(const Decl *D) const { return true; } const RawComment *fetchRawCommentForDecl(const Decl *D) const { - return this->Context.getRawCommentForDeclNoCache(D); + if (const auto *Comment = this->Context.getRawCommentForDeclNoCache(D)) + return Comment; + + if (const auto *Declarator = dyn_cast(D)) { + const auto *TagTypeDecl = Declarator->getType()->getAsTagDecl(); + if (TagTypeDecl && TagTypeDecl->isEmbeddedInDeclarator() && + TagTypeDecl->isCompleteDefinition()) + return this->Context.getRawCommentForDeclNoCache(TagTypeDecl); + } + + return nullptr; } }; diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 8493026f5f7a6..d054b8cf0d240 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -467,15 +467,18 @@ class Parser : public CodeCompletionHandler { /// Flags describing a context in which we're parsing a statement. enum class ParsedStmtContext { + /// This context permits declarations in language modes where declarations + /// are not statements. + AllowDeclarationsInC = 0x1, /// This context permits standalone OpenMP directives. - AllowStandaloneOpenMPDirectives = 0x1, + AllowStandaloneOpenMPDirectives = 0x2, /// This context is at the top level of a GNU statement expression. - InStmtExpr = 0x2, + InStmtExpr = 0x4, /// The context of a regular substatement. SubStmt = 0, /// The context of a compound-statement. - Compound = AllowStandaloneOpenMPDirectives, + Compound = AllowDeclarationsInC | AllowStandaloneOpenMPDirectives, LLVM_MARK_AS_BITMASK_ENUM(InStmtExpr) }; @@ -3656,6 +3659,7 @@ class Parser : public CodeCompletionHandler { struct OpenACCDirectiveParseInfo { OpenACCDirectiveKind DirKind; SourceLocation StartLoc; + SourceLocation DirLoc; SourceLocation EndLoc; SmallVector Clauses; // TODO OpenACC: As we implement support for the Atomic, Routine, Cache, and diff --git a/clang/include/clang/Sema/SemaOpenACC.h b/clang/include/clang/Sema/SemaOpenACC.h index 6f69fa08939b8..66144de4340a8 100644 --- a/clang/include/clang/Sema/SemaOpenACC.h +++ b/clang/include/clang/Sema/SemaOpenACC.h @@ -379,7 +379,7 @@ class SemaOpenACC : public SemaBase { /// Called after the construct has been parsed, but clauses haven't been /// parsed. This allows us to diagnose not-implemented, as well as set up any /// state required for parsing the clauses. - void ActOnConstruct(OpenACCDirectiveKind K, SourceLocation StartLoc); + void ActOnConstruct(OpenACCDirectiveKind K, SourceLocation DirLoc); /// Called after the directive, including its clauses, have been parsed and /// parsing has consumed the 'annot_pragma_openacc_end' token. This DOES @@ -400,6 +400,7 @@ class SemaOpenACC : public SemaBase { /// declaration group or associated statement. StmtResult ActOnEndStmtDirective(OpenACCDirectiveKind K, SourceLocation StartLoc, + SourceLocation DirLoc, SourceLocation EndLoc, ArrayRef Clauses, StmtResult AssocStmt); diff --git a/clang/lib/AST/APValue.cpp b/clang/lib/AST/APValue.cpp index 8c77b563657d9..d8e33ff421c06 100644 --- a/clang/lib/AST/APValue.cpp +++ b/clang/lib/AST/APValue.cpp @@ -90,7 +90,7 @@ QualType APValue::LValueBase::getType() const { // For a materialized temporary, the type of the temporary we materialized // may not be the type of the expression. if (const MaterializeTemporaryExpr *MTE = - clang::dyn_cast(Base)) { + llvm::dyn_cast(Base)) { SmallVector CommaLHSs; SmallVector Adjustments; const Expr *Temp = MTE->getSubExpr(); diff --git a/clang/lib/AST/StmtOpenACC.cpp b/clang/lib/AST/StmtOpenACC.cpp index a381a8dd7b62c..47899b344c97a 100644 --- a/clang/lib/AST/StmtOpenACC.cpp +++ b/clang/lib/AST/StmtOpenACC.cpp @@ -23,15 +23,14 @@ OpenACCComputeConstruct::CreateEmpty(const ASTContext &C, unsigned NumClauses) { return Inst; } -OpenACCComputeConstruct * -OpenACCComputeConstruct::Create(const ASTContext &C, OpenACCDirectiveKind K, - SourceLocation BeginLoc, SourceLocation EndLoc, - ArrayRef Clauses, - Stmt *StructuredBlock) { +OpenACCComputeConstruct *OpenACCComputeConstruct::Create( + const ASTContext &C, OpenACCDirectiveKind K, SourceLocation BeginLoc, + SourceLocation DirLoc, SourceLocation EndLoc, + ArrayRef Clauses, Stmt *StructuredBlock) { void *Mem = C.Allocate( OpenACCComputeConstruct::totalSizeToAlloc( Clauses.size())); - auto *Inst = new (Mem) - OpenACCComputeConstruct(K, BeginLoc, EndLoc, Clauses, StructuredBlock); + auto *Inst = new (Mem) OpenACCComputeConstruct(K, BeginLoc, DirLoc, EndLoc, + Clauses, StructuredBlock); return Inst; } diff --git a/clang/lib/AST/TemplateBase.cpp b/clang/lib/AST/TemplateBase.cpp index a7ee973b7f7d0..b50daf5fbed6a 100644 --- a/clang/lib/AST/TemplateBase.cpp +++ b/clang/lib/AST/TemplateBase.cpp @@ -221,8 +221,13 @@ static const ValueDecl *getAsSimpleValueDeclRef(const ASTContext &Ctx, // We model class non-type template parameters as their template parameter // object declaration. - if (V.isStruct() || V.isUnion()) + if (V.isStruct() || V.isUnion()) { + // Dependent types are not supposed to be described as + // TemplateParamObjectDecls. + if (T->isDependentType() || T->isInstantiationDependentType()) + return nullptr; return Ctx.getTemplateParamObjectDecl(T, V); + } // Pointers and references with an empty path use the special 'Declaration' // representation. diff --git a/clang/lib/Analysis/MacroExpansionContext.cpp b/clang/lib/Analysis/MacroExpansionContext.cpp index 564e359668a51..b212b7f245792 100644 --- a/clang/lib/Analysis/MacroExpansionContext.cpp +++ b/clang/lib/Analysis/MacroExpansionContext.cpp @@ -12,7 +12,7 @@ #define DEBUG_TYPE "macro-expansion-context" -static void dumpTokenInto(const clang::Preprocessor &PP, clang::raw_ostream &OS, +static void dumpTokenInto(const clang::Preprocessor &PP, llvm::raw_ostream &OS, clang::Token Tok); namespace clang { diff --git a/clang/lib/Basic/CharInfo.cpp b/clang/lib/Basic/CharInfo.cpp index d02054c9718f5..26d693b8e9b94 100644 --- a/clang/lib/Basic/CharInfo.cpp +++ b/clang/lib/Basic/CharInfo.cpp @@ -31,20 +31,20 @@ const uint16_t clang::charinfo::InfoTable[256] = { 0 , 0 , 0 , 0 , //32 SP 33 ! 34 " 35 # //36 $ 37 % 38 & 39 ' - CHAR_SPACE , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , - CHAR_PUNCT , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , + CHAR_SPACE , CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , + CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , //40 ( 41 ) 42 * 43 + //44 , 45 - 46 . 47 / - CHAR_PUNCT , CHAR_PUNCT , CHAR_RAWDEL , CHAR_RAWDEL , - CHAR_RAWDEL , CHAR_RAWDEL , CHAR_PERIOD , CHAR_RAWDEL , + CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , + CHAR_PUNCT , CHAR_PUNCT , CHAR_PERIOD , CHAR_PUNCT , //48 0 49 1 50 2 51 3 //52 4 53 5 54 6 55 7 CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , //56 8 57 9 58 : 59 ; //60 < 61 = 62 > 63 ? - CHAR_DIGIT , CHAR_DIGIT , CHAR_RAWDEL , CHAR_RAWDEL , - CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , + CHAR_DIGIT , CHAR_DIGIT , CHAR_PUNCT , CHAR_PUNCT , + CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , //64 @ 65 A 66 B 67 C //68 D 69 E 70 F 71 G CHAR_PUNCT , CHAR_XUPPER , CHAR_XUPPER , CHAR_XUPPER , @@ -59,8 +59,8 @@ const uint16_t clang::charinfo::InfoTable[256] = { CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , //88 X 89 Y 90 Z 91 [ //92 \ 93 ] 94 ^ 95 _ - CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_RAWDEL , - CHAR_PUNCT , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_UNDER , + CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_PUNCT , + CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , CHAR_UNDER , //96 ` 97 a 98 b 99 c //100 d 101 e 102 f 103 g CHAR_PUNCT , CHAR_XLOWER , CHAR_XLOWER , CHAR_XLOWER , @@ -75,6 +75,6 @@ const uint16_t clang::charinfo::InfoTable[256] = { CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , //120 x 121 y 122 z 123 { //124 | 125 } 126 ~ 127 DEL - CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_RAWDEL , - CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , 0 + CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_PUNCT , + CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , 0 }; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 3a30cff917bb4..08e44360bfbe3 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -961,7 +961,7 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, if (HasCF) Builder.defineMacro("__CF__"); // Condition here is aligned with the feature set of mapxf in Options.td - if (HasEGPR && HasPush2Pop2 && HasPPX && HasNDD) + if (HasEGPR && HasPush2Pop2 && HasPPX && HasNDD && HasCCMP && HasNF) Builder.defineMacro("__APX_F__"); // Each case falls through to the previous one here. diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 5edf8c7970913..266bf41fd5577 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -14074,7 +14074,7 @@ Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) { // Grab the appropriate field from __cpu_model. llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, Index)}; - llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs); + llvm::Value *CpuValue = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs); CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue, CharUnits::fromQuantity(4)); @@ -14116,7 +14116,7 @@ CodeGenFunction::EmitX86CpuSupports(std::array FeatureMask) { // global in the struct STy. Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3), Builder.getInt32(0)}; - Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); + Value *CpuFeatures = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs); Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures, CharUnits::fromQuantity(4)); @@ -14137,7 +14137,7 @@ CodeGenFunction::EmitX86CpuSupports(std::array FeatureMask) { continue; Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)}; Value *Features = Builder.CreateAlignedLoad( - Int32Ty, Builder.CreateGEP(ATy, CpuFeatures2, Idxs), + Int32Ty, Builder.CreateInBoundsGEP(ATy, CpuFeatures2, Idxs), CharUnits::fromQuantity(4)); // Check the value of the bit corresponding to the feature requested. Value *Mask = Builder.getInt32(M); @@ -16724,7 +16724,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, FieldIdx)}; - FieldValue = Builder.CreateGEP(STy, SysConf, Idxs); + FieldValue = Builder.CreateInBoundsGEP(STy, SysConf, Idxs); FieldValue = Builder.CreateAlignedLoad(Int32Ty, FieldValue, CharUnits::fromQuantity(4)); } else if (SupportMethod == SYS_CALL) { @@ -20806,6 +20806,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, } case WebAssembly::BI__builtin_wasm_min_f32: case WebAssembly::BI__builtin_wasm_min_f64: + case WebAssembly::BI__builtin_wasm_min_f16x8: case WebAssembly::BI__builtin_wasm_min_f32x4: case WebAssembly::BI__builtin_wasm_min_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); @@ -20816,6 +20817,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, } case WebAssembly::BI__builtin_wasm_max_f32: case WebAssembly::BI__builtin_wasm_max_f64: + case WebAssembly::BI__builtin_wasm_max_f16x8: case WebAssembly::BI__builtin_wasm_max_f32x4: case WebAssembly::BI__builtin_wasm_max_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); @@ -20824,6 +20826,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } + case WebAssembly::BI__builtin_wasm_pmin_f16x8: case WebAssembly::BI__builtin_wasm_pmin_f32x4: case WebAssembly::BI__builtin_wasm_pmin_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); @@ -20832,6 +20835,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } + case WebAssembly::BI__builtin_wasm_pmax_f16x8: case WebAssembly::BI__builtin_wasm_pmax_f32x4: case WebAssembly::BI__builtin_wasm_pmax_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index bba00257fd4f0..7a92fc3dfb4a4 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -1789,7 +1789,6 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr( // Push a destructor if necessary. // FIXME: if we have an array of structures, all explicitly // initialized, we can end up pushing a linear number of cleanups. - bool pushedCleanup = false; if (QualType::DestructionKind dtorKind = field->getType().isDestructedType()) { assert(LV.isSimple()); @@ -1797,17 +1796,8 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr( CGF.pushDestroyAndDeferDeactivation(NormalAndEHCleanup, LV.getAddress(), field->getType(), CGF.getDestroyer(dtorKind), false); - pushedCleanup = true; } } - - // If the GEP didn't get used because of a dead zero init or something - // else, clean it up for -O0 builds and general tidiness. - if (!pushedCleanup && LV.isSimple()) - if (llvm::GetElementPtrInst *GEP = - dyn_cast(LV.emitRawPointer(CGF))) - if (GEP->use_empty()) - GEP->eraseFromParent(); } } diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index e4774a587707a..0b0b659e1fd49 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -5341,6 +5341,18 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, !IsDefinitionAvailableExternally && D->needsDestruction(getContext()) == QualType::DK_cxx_destructor; + // It is helpless to emit the definition for an available_externally variable + // which can't be marked as const. + // We don't need to check if it needs global ctor or dtor. See the above + // comment for ideas. + if (IsDefinitionAvailableExternally && + (!D->hasConstantInitialization() || + // TODO: Update this when we have interface to check constexpr + // destructor. + D->needsDestruction(getContext()) || + !D->getType().isConstantStorage(getContext(), true, true))) + return; + const VarDecl *InitDecl; const Expr *InitExpr = D->getAnyInitializer(InitDecl); diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 76704c4d7be4a..db8e6f55302ad 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -1340,7 +1340,7 @@ void CodeGenPGO::setProfileVersion(llvm::Module &M) { llvm::APInt(64, ProfileVersion)), VarName); - IRLevelVersionVariable->setVisibility(llvm::GlobalValue::DefaultVisibility); + IRLevelVersionVariable->setVisibility(llvm::GlobalValue::HiddenVisibility); llvm::Triple TT(M.getTargetTriple()); if (TT.supportsCOMDAT()) { IRLevelVersionVariable->setLinkage(llvm::GlobalValue::ExternalLinkage); diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 9849c59685cca..b141e5f2adfab 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -2227,10 +2227,19 @@ void Generic_GCC::GCCInstallationDetector::init( SmallVector CandidateBiarchTripleAliases; // Add some triples that we want to check first. CandidateTripleAliases.push_back(TargetTriple.str()); - std::string TripleNoVendor = TargetTriple.getArchName().str() + "-" + - TargetTriple.getOSAndEnvironmentName().str(); - if (TargetTriple.getVendor() == llvm::Triple::UnknownVendor) + std::string TripleNoVendor, BiarchTripleNoVendor; + if (TargetTriple.getVendor() == llvm::Triple::UnknownVendor) { + StringRef OSEnv = TargetTriple.getOSAndEnvironmentName(); + if (TargetTriple.getEnvironment() == llvm::Triple::GNUX32) + OSEnv = "linux-gnu"; + TripleNoVendor = (TargetTriple.getArchName().str() + '-' + OSEnv).str(); CandidateTripleAliases.push_back(TripleNoVendor); + if (BiarchVariantTriple.getArch() != llvm::Triple::UnknownArch) { + BiarchTripleNoVendor = + (BiarchVariantTriple.getArchName().str() + '-' + OSEnv).str(); + CandidateBiarchTripleAliases.push_back(BiarchTripleNoVendor); + } + } CollectLibDirsAndTriples(TargetTriple, BiarchVariantTriple, CandidateLibDirs, CandidateTripleAliases, CandidateBiarchLibDirs, @@ -2453,11 +2462,9 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( // lists should shrink over time. Please don't add more elements to *Triples. static const char *const AArch64LibDirs[] = {"/lib64", "/lib"}; static const char *const AArch64Triples[] = { - "aarch64-none-linux-gnu", "aarch64-linux-gnu", "aarch64-redhat-linux", - "aarch64-suse-linux"}; + "aarch64-none-linux-gnu", "aarch64-redhat-linux", "aarch64-suse-linux"}; static const char *const AArch64beLibDirs[] = {"/lib"}; - static const char *const AArch64beTriples[] = {"aarch64_be-none-linux-gnu", - "aarch64_be-linux-gnu"}; + static const char *const AArch64beTriples[] = {"aarch64_be-none-linux-gnu"}; static const char *const ARMLibDirs[] = {"/lib"}; static const char *const ARMTriples[] = {"arm-linux-gnueabi"}; @@ -2482,9 +2489,8 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( "x86_64-linux-gnu", "x86_64-unknown-linux-gnu", "x86_64-pc-linux-gnu", "x86_64-redhat-linux6E", "x86_64-redhat-linux", "x86_64-suse-linux", - "x86_64-manbo-linux-gnu", "x86_64-linux-gnu", - "x86_64-slackware-linux", "x86_64-unknown-linux", - "x86_64-amazon-linux"}; + "x86_64-manbo-linux-gnu", "x86_64-slackware-linux", + "x86_64-unknown-linux", "x86_64-amazon-linux"}; static const char *const X32Triples[] = {"x86_64-linux-gnux32", "x86_64-pc-linux-gnux32"}; static const char *const X32LibDirs[] = {"/libx32", "/lib"}; @@ -2500,26 +2506,24 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( "loongarch64-linux-gnu", "loongarch64-unknown-linux-gnu"}; static const char *const M68kLibDirs[] = {"/lib"}; - static const char *const M68kTriples[] = { - "m68k-linux-gnu", "m68k-unknown-linux-gnu", "m68k-suse-linux"}; + static const char *const M68kTriples[] = {"m68k-unknown-linux-gnu", + "m68k-suse-linux"}; static const char *const MIPSLibDirs[] = {"/libo32", "/lib"}; static const char *const MIPSTriples[] = { "mips-linux-gnu", "mips-mti-linux", "mips-mti-linux-gnu", "mips-img-linux-gnu", "mipsisa32r6-linux-gnu"}; static const char *const MIPSELLibDirs[] = {"/libo32", "/lib"}; - static const char *const MIPSELTriples[] = { - "mipsel-linux-gnu", "mips-img-linux-gnu", "mipsisa32r6el-linux-gnu"}; + static const char *const MIPSELTriples[] = {"mipsel-linux-gnu", + "mips-img-linux-gnu"}; static const char *const MIPS64LibDirs[] = {"/lib64", "/lib"}; static const char *const MIPS64Triples[] = { - "mips64-linux-gnu", "mips-mti-linux-gnu", - "mips-img-linux-gnu", "mips64-linux-gnuabi64", + "mips-mti-linux-gnu", "mips-img-linux-gnu", "mips64-linux-gnuabi64", "mipsisa64r6-linux-gnu", "mipsisa64r6-linux-gnuabi64"}; static const char *const MIPS64ELLibDirs[] = {"/lib64", "/lib"}; static const char *const MIPS64ELTriples[] = { - "mips64el-linux-gnu", "mips-mti-linux-gnu", - "mips-img-linux-gnu", "mips64el-linux-gnuabi64", + "mips-mti-linux-gnu", "mips-img-linux-gnu", "mips64el-linux-gnuabi64", "mipsisa64r6el-linux-gnu", "mipsisa64r6el-linux-gnuabi64"}; static const char *const MIPSN32LibDirs[] = {"/lib32"}; @@ -2534,46 +2538,39 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( static const char *const PPCLibDirs[] = {"/lib32", "/lib"}; static const char *const PPCTriples[] = { - "powerpc-linux-gnu", "powerpc-unknown-linux-gnu", "powerpc-linux-gnuspe", + "powerpc-unknown-linux-gnu", // On 32-bit PowerPC systems running SUSE Linux, gcc is configured as a // 64-bit compiler which defaults to "-m32", hence "powerpc64-suse-linux". "powerpc64-suse-linux", "powerpc-montavista-linuxspe"}; static const char *const PPCLELibDirs[] = {"/lib32", "/lib"}; - static const char *const PPCLETriples[] = {"powerpcle-linux-gnu", - "powerpcle-unknown-linux-gnu", + static const char *const PPCLETriples[] = {"powerpcle-unknown-linux-gnu", "powerpcle-linux-musl"}; static const char *const PPC64LibDirs[] = {"/lib64", "/lib"}; - static const char *const PPC64Triples[] = { - "powerpc64-linux-gnu", "powerpc64-unknown-linux-gnu", - "powerpc64-suse-linux", "ppc64-redhat-linux"}; + static const char *const PPC64Triples[] = {"powerpc64-unknown-linux-gnu", + "powerpc64-suse-linux", + "ppc64-redhat-linux"}; static const char *const PPC64LELibDirs[] = {"/lib64", "/lib"}; static const char *const PPC64LETriples[] = { - "powerpc64le-linux-gnu", "powerpc64le-unknown-linux-gnu", - "powerpc64le-none-linux-gnu", "powerpc64le-suse-linux", - "ppc64le-redhat-linux"}; + "powerpc64le-unknown-linux-gnu", "powerpc64le-none-linux-gnu", + "powerpc64le-suse-linux", "ppc64le-redhat-linux"}; static const char *const RISCV32LibDirs[] = {"/lib32", "/lib"}; static const char *const RISCV32Triples[] = {"riscv32-unknown-linux-gnu", - "riscv32-linux-gnu", "riscv32-unknown-elf"}; static const char *const RISCV64LibDirs[] = {"/lib64", "/lib"}; static const char *const RISCV64Triples[] = {"riscv64-unknown-linux-gnu", - "riscv64-linux-gnu", "riscv64-unknown-elf"}; static const char *const SPARCv8LibDirs[] = {"/lib32", "/lib"}; - static const char *const SPARCv8Triples[] = {"sparc-linux-gnu", - "sparcv8-linux-gnu"}; + static const char *const SPARCv8Triples[] = {"sparcv8-linux-gnu"}; static const char *const SPARCv9LibDirs[] = {"/lib64", "/lib"}; - static const char *const SPARCv9Triples[] = {"sparc64-linux-gnu", - "sparcv9-linux-gnu"}; + static const char *const SPARCv9Triples[] = {"sparcv9-linux-gnu"}; static const char *const SystemZLibDirs[] = {"/lib64", "/lib"}; static const char *const SystemZTriples[] = { - "s390x-linux-gnu", "s390x-unknown-linux-gnu", "s390x-ibm-linux-gnu", - "s390x-suse-linux", "s390x-redhat-linux"}; - + "s390x-unknown-linux-gnu", "s390x-ibm-linux-gnu", "s390x-suse-linux", + "s390x-redhat-linux"}; using std::begin; using std::end; diff --git a/clang/lib/Driver/ToolChains/HIPUtility.cpp b/clang/lib/Driver/ToolChains/HIPUtility.cpp index b1ff697b368b1..f32a23f111e4b 100644 --- a/clang/lib/Driver/ToolChains/HIPUtility.cpp +++ b/clang/lib/Driver/ToolChains/HIPUtility.cpp @@ -106,9 +106,9 @@ class HIPUndefinedFatBinSymbols { std::string ID = IA->getId().str(); if (!ID.empty()) { ID = llvm::utohexstr(llvm::MD5Hash(ID), /*LowerCase=*/true); - FatBinSymbols.insert(Twine(FatBinPrefix + "_" + ID).str()); + FatBinSymbols.insert((FatBinPrefix + Twine('_') + ID).str()); GPUBinHandleSymbols.insert( - Twine(GPUBinHandlePrefix + "_" + ID).str()); + (GPUBinHandlePrefix + Twine('_') + ID).str()); continue; } if (IA->getInputArg().getNumValues() == 0) diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index c98645993abe0..c7543a48c0b50 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -2261,8 +2261,17 @@ bool Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr, unsigned PrefixLen = 0; - while (PrefixLen != 16 && isRawStringDelimBody(CurPtr[PrefixLen])) + while (PrefixLen != 16 && isRawStringDelimBody(CurPtr[PrefixLen])) { ++PrefixLen; + if (!isLexingRawMode() && + llvm::is_contained({'$', '@', '`'}, CurPtr[PrefixLen])) { + const char *Pos = &CurPtr[PrefixLen]; + Diag(Pos, LangOpts.CPlusPlus26 + ? diag::warn_cxx26_compat_raw_string_literal_character_set + : diag::ext_cxx26_raw_string_literal_character_set) + << StringRef(Pos, 1); + } + } // If the last character was not a '(', then we didn't lex a valid delimiter. if (CurPtr[PrefixLen] != '(') { diff --git a/clang/lib/Parse/ParseOpenACC.cpp b/clang/lib/Parse/ParseOpenACC.cpp index e9c60f76165b6..63afc18783a1f 100644 --- a/clang/lib/Parse/ParseOpenACC.cpp +++ b/clang/lib/Parse/ParseOpenACC.cpp @@ -1347,11 +1347,13 @@ void Parser::ParseOpenACCCacheVarList() { ParseOpenACCVarList(OpenACCClauseKind::Invalid); } -Parser::OpenACCDirectiveParseInfo Parser::ParseOpenACCDirective() { - SourceLocation StartLoc = getCurToken().getLocation(); +Parser::OpenACCDirectiveParseInfo +Parser::ParseOpenACCDirective() { + SourceLocation StartLoc = ConsumeAnnotationToken(); + SourceLocation DirLoc = getCurToken().getLocation(); OpenACCDirectiveKind DirKind = ParseOpenACCDirectiveKind(*this); - getActions().OpenACC().ActOnConstruct(DirKind, StartLoc); + getActions().OpenACC().ActOnConstruct(DirKind, DirLoc); // Once we've parsed the construct/directive name, some have additional // specifiers that need to be taken care of. Atomic has an 'atomic-clause' @@ -1390,7 +1392,7 @@ Parser::OpenACCDirectiveParseInfo Parser::ParseOpenACCDirective() { break; case OpenACCDirectiveKind::Wait: // OpenACC has an optional paren-wrapped 'wait-argument'. - if (ParseOpenACCWaitArgument(StartLoc, /*IsDirective=*/true).Failed) + if (ParseOpenACCWaitArgument(DirLoc, /*IsDirective=*/true).Failed) T.skipToEnd(); else T.consumeClose(); @@ -1404,7 +1406,8 @@ Parser::OpenACCDirectiveParseInfo Parser::ParseOpenACCDirective() { } // Parses the list of clauses, if present, plus set up return value. - OpenACCDirectiveParseInfo ParseInfo{DirKind, StartLoc, SourceLocation{}, + OpenACCDirectiveParseInfo ParseInfo{DirKind, StartLoc, DirLoc, + SourceLocation{}, ParseOpenACCClauseList(DirKind)}; assert(Tok.is(tok::annot_pragma_openacc_end) && @@ -1421,7 +1424,6 @@ Parser::DeclGroupPtrTy Parser::ParseOpenACCDirectiveDecl() { assert(Tok.is(tok::annot_pragma_openacc) && "expected OpenACC Start Token"); ParsingOpenACCDirectiveRAII DirScope(*this); - ConsumeAnnotationToken(); OpenACCDirectiveParseInfo DirInfo = ParseOpenACCDirective(); @@ -1438,7 +1440,6 @@ StmtResult Parser::ParseOpenACCDirectiveStmt() { assert(Tok.is(tok::annot_pragma_openacc) && "expected OpenACC Start Token"); ParsingOpenACCDirectiveRAII DirScope(*this); - ConsumeAnnotationToken(); OpenACCDirectiveParseInfo DirInfo = ParseOpenACCDirective(); if (getActions().OpenACC().ActOnStartStmtDirective(DirInfo.DirKind, @@ -1456,6 +1457,6 @@ StmtResult Parser::ParseOpenACCDirectiveStmt() { } return getActions().OpenACC().ActOnEndStmtDirective( - DirInfo.DirKind, DirInfo.StartLoc, DirInfo.EndLoc, DirInfo.Clauses, - AssocStmt); + DirInfo.DirKind, DirInfo.StartLoc, DirInfo.DirLoc, DirInfo.EndLoc, + DirInfo.Clauses, AssocStmt); } diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index e959dd6378f46..cd8df3332724f 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -25,7 +25,6 @@ #include "clang/Sema/SemaOpenMP.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/UniqueVector.h" #include "llvm/Frontend/OpenMP/OMPAssume.h" #include "llvm/Frontend/OpenMP/OMPContext.h" #include diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp index b0af04451166c..c25203243ee49 100644 --- a/clang/lib/Parse/ParseStmt.cpp +++ b/clang/lib/Parse/ParseStmt.cpp @@ -239,7 +239,15 @@ StmtResult Parser::ParseStatementOrDeclarationAfterAttributes( auto IsStmtAttr = [](ParsedAttr &Attr) { return Attr.isStmtAttr(); }; bool AllAttrsAreStmtAttrs = llvm::all_of(CXX11Attrs, IsStmtAttr) && llvm::all_of(GNUAttrs, IsStmtAttr); - if (((GNUAttributeLoc.isValid() && !(HaveAttrs && AllAttrsAreStmtAttrs)) || + // In C, the grammar production for statement (C23 6.8.1p1) does not allow + // for declarations, which is different from C++ (C++23 [stmt.pre]p1). So + // in C++, we always allow a declaration, but in C we need to check whether + // we're in a statement context that allows declarations. e.g., in C, the + // following is invalid: if (1) int x; + if ((getLangOpts().CPlusPlus || getLangOpts().MicrosoftExt || + (StmtCtx & ParsedStmtContext::AllowDeclarationsInC) != + ParsedStmtContext()) && + ((GNUAttributeLoc.isValid() && !(HaveAttrs && AllAttrsAreStmtAttrs)) || isDeclarationStatement())) { SourceLocation DeclStart = Tok.getLocation(), DeclEnd; DeclGroupPtrTy Decl; diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index d3e9dcb4f4399..6595abbcdda5b 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -1444,10 +1444,10 @@ bool Sema::CheckCXXThisType(SourceLocation Loc, QualType Type) { // category are defined within such member functions as they are within // an implicit object member function). DeclContext *DC = getFunctionLevelDeclContext(); - if (const auto *Method = dyn_cast(DC); - Method && Method->isExplicitObjectMemberFunction()) { + const auto *Method = dyn_cast(DC); + if (Method && Method->isExplicitObjectMemberFunction()) { Diag(Loc, diag::err_invalid_this_use) << 1; - } else if (isLambdaCallWithExplicitObjectParameter(CurContext)) { + } else if (Method && isLambdaCallWithExplicitObjectParameter(CurContext)) { Diag(Loc, diag::err_invalid_this_use) << 1; } else { Diag(Loc, diag::err_invalid_this_use) << 0; diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp index ef0a655b631ab..be6ea20a956a3 100644 --- a/clang/lib/Sema/SemaLookup.cpp +++ b/clang/lib/Sema/SemaLookup.cpp @@ -5897,6 +5897,16 @@ void Sema::diagnoseTypo(const TypoCorrection &Correction, NamedDecl *ChosenDecl = Correction.isKeyword() ? nullptr : Correction.getFoundDecl(); + + // For builtin functions which aren't declared anywhere in source, + // don't emit the "declared here" note. + if (const auto *FD = dyn_cast_if_present(ChosenDecl); + FD && FD->getBuiltinID() && + PrevNote.getDiagID() == diag::note_previous_decl && + Correction.getCorrectionRange().getBegin() == FD->getBeginLoc()) { + ChosenDecl = nullptr; + } + if (PrevNote.getDiagID() && ChosenDecl) Diag(ChosenDecl->getLocation(), PrevNote) << CorrectedQuotedStr << (ErrorRecovery ? FixItHint() : FixTypo); diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp index 09d91b31cfe5f..15239f4f35c39 100644 --- a/clang/lib/Sema/SemaOpenACC.cpp +++ b/clang/lib/Sema/SemaOpenACC.cpp @@ -844,7 +844,7 @@ ExprResult SemaOpenACC::CheckReductionVar(Expr *VarExpr) { } void SemaOpenACC::ActOnConstruct(OpenACCDirectiveKind K, - SourceLocation StartLoc) { + SourceLocation DirLoc) { switch (K) { case OpenACCDirectiveKind::Invalid: // Nothing to do here, an invalid kind has nothing we can check here. We @@ -859,7 +859,7 @@ void SemaOpenACC::ActOnConstruct(OpenACCDirectiveKind K, // here as these constructs do not take any arguments. break; default: - Diag(StartLoc, diag::warn_acc_construct_unimplemented) << K; + Diag(DirLoc, diag::warn_acc_construct_unimplemented) << K; break; } } @@ -1265,6 +1265,7 @@ bool SemaOpenACC::ActOnStartStmtDirective(OpenACCDirectiveKind K, StmtResult SemaOpenACC::ActOnEndStmtDirective(OpenACCDirectiveKind K, SourceLocation StartLoc, + SourceLocation DirLoc, SourceLocation EndLoc, ArrayRef Clauses, StmtResult AssocStmt) { @@ -1278,7 +1279,7 @@ StmtResult SemaOpenACC::ActOnEndStmtDirective(OpenACCDirectiveKind K, case OpenACCDirectiveKind::Kernels: // TODO OpenACC: Add clauses to the construct here. return OpenACCComputeConstruct::Create( - getASTContext(), K, StartLoc, EndLoc, Clauses, + getASTContext(), K, StartLoc, DirLoc, EndLoc, Clauses, AssocStmt.isUsable() ? AssocStmt.get() : nullptr); } llvm_unreachable("Unhandled case in directive handling?"); diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp index 8735d96c84079..6f538ed55cb72 100644 --- a/clang/lib/Sema/SemaStmtAttr.cpp +++ b/clang/lib/Sema/SemaStmtAttr.cpp @@ -285,7 +285,7 @@ bool Sema::CheckAlwaysInlineAttr(const Stmt *OrigSt, const Stmt *CurSt, static Attr *handleNoInlineAttr(Sema &S, Stmt *St, const ParsedAttr &A, SourceRange Range) { NoInlineAttr NIA(S.Context, A); - if (!NIA.isClangNoInline()) { + if (!NIA.isStmtNoInline()) { S.Diag(St->getBeginLoc(), diag::warn_function_attribute_ignored_in_stmt) << "[[clang::noinline]]"; return nullptr; @@ -684,10 +684,8 @@ ExprResult Sema::ActOnCXXAssumeAttr(Stmt *St, const ParsedAttr &A, } if (!getLangOpts().CPlusPlus23 && - A.getSyntax() == AttributeCommonInfo::AS_CXX11) { - llvm::dbgs() << "Syntax: " << int(A.getSyntax()) << "\n"; + A.getSyntax() == AttributeCommonInfo::AS_CXX11) Diag(A.getLoc(), diag::ext_cxx23_attr) << A << Range; - } return Assumption; } diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index dee335b526991..765e6177d202d 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -4033,11 +4033,12 @@ class TreeTransform { StmtResult RebuildOpenACCComputeConstruct(OpenACCDirectiveKind K, SourceLocation BeginLoc, + SourceLocation DirLoc, SourceLocation EndLoc, ArrayRef Clauses, StmtResult StrBlock) { - return getSema().OpenACC().ActOnEndStmtDirective(K, BeginLoc, EndLoc, - Clauses, StrBlock); + return getSema().OpenACC().ActOnEndStmtDirective(K, BeginLoc, DirLoc, + EndLoc, Clauses, StrBlock); } private: @@ -11559,8 +11560,8 @@ StmtResult TreeTransform::TransformOpenACCComputeConstruct( getSema().OpenACC().ActOnAssociatedStmt(C->getDirectiveKind(), StrBlock); return getDerived().RebuildOpenACCComputeConstruct( - C->getDirectiveKind(), C->getBeginLoc(), C->getEndLoc(), - TransformedClauses, StrBlock); + C->getDirectiveKind(), C->getBeginLoc(), C->getDirectiveLoc(), + C->getEndLoc(), TransformedClauses, StrBlock); } //===----------------------------------------------------------------------===// diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index eac4faff28549..bea2b94989107 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2797,6 +2797,7 @@ void ASTStmtReader::VisitOpenACCConstructStmt(OpenACCConstructStmt *S) { (void)Record.readInt(); S->Kind = Record.readEnum(); S->Range = Record.readSourceRange(); + S->DirectiveLoc = Record.readSourceLocation(); Record.readOpenACCClauseList(S->Clauses); } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index dd548fabfd955..e830c4026ea78 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -7835,7 +7835,7 @@ void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) { case OpenACCClauseKind::If: { const auto *IC = cast(C); writeSourceLocation(IC->getLParenLoc()); - writeStmtRef(IC->getConditionExpr()); + AddStmt(const_cast(IC->getConditionExpr())); return; } case OpenACCClauseKind::Self: { @@ -7843,7 +7843,7 @@ void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) { writeSourceLocation(SC->getLParenLoc()); writeBool(SC->hasConditionExpr()); if (SC->hasConditionExpr()) - writeStmtRef(SC->getConditionExpr()); + AddStmt(const_cast(SC->getConditionExpr())); return; } case OpenACCClauseKind::NumGangs: { @@ -7857,13 +7857,13 @@ void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) { case OpenACCClauseKind::NumWorkers: { const auto *NWC = cast(C); writeSourceLocation(NWC->getLParenLoc()); - writeStmtRef(NWC->getIntExpr()); + AddStmt(const_cast(NWC->getIntExpr())); return; } case OpenACCClauseKind::VectorLength: { const auto *NWC = cast(C); writeSourceLocation(NWC->getLParenLoc()); - writeStmtRef(NWC->getIntExpr()); + AddStmt(const_cast(NWC->getIntExpr())); return; } case OpenACCClauseKind::Private: { @@ -7942,15 +7942,15 @@ void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) { writeSourceLocation(AC->getLParenLoc()); writeBool(AC->hasIntExpr()); if (AC->hasIntExpr()) - writeStmtRef(AC->getIntExpr()); + AddStmt(const_cast(AC->getIntExpr())); return; } case OpenACCClauseKind::Wait: { const auto *WC = cast(C); writeSourceLocation(WC->getLParenLoc()); writeBool(WC->getDevNumExpr()); - if (const Expr *DNE = WC->getDevNumExpr()) - writeStmtRef(DNE); + if (Expr *DNE = WC->getDevNumExpr()) + AddStmt(DNE); writeSourceLocation(WC->getQueuesLoc()); writeOpenACCIntExprList(WC->getQueueIdExprs()); diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index a44852af97bea..3c586b270fbf4 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2847,6 +2847,7 @@ void ASTStmtWriter::VisitOpenACCConstructStmt(OpenACCConstructStmt *S) { Record.push_back(S->clauses().size()); Record.writeEnum(S->Kind); Record.AddSourceRange(S->Range); + Record.AddSourceLocation(S->DirectiveLoc); Record.writeOpenACCClauseList(S->clauses()); } diff --git a/clang/test/C/C99/block-scopes.c b/clang/test/C/C99/block-scopes.c index 589047df3e52b..116e5d922593e 100644 --- a/clang/test/C/C99/block-scopes.c +++ b/clang/test/C/C99/block-scopes.c @@ -18,8 +18,9 @@ enum {a, b}; void different(void) { - if (sizeof(enum {b, a}) != sizeof(int)) + if (sizeof(enum {b, a}) != sizeof(int)) { _Static_assert(a == 1, ""); + } /* In C89, the 'b' found here would have been from the enum declaration in * the controlling expression of the selection statement, not from the global * declaration. In C99 and later, that enumeration is scoped to the 'if' diff --git a/clang/test/CodeGen/X86/x86-atomic-double.c b/clang/test/CodeGen/X86/x86-atomic-double.c new file mode 100644 index 0000000000000..2354c89cc2b17 --- /dev/null +++ b/clang/test/CodeGen/X86/x86-atomic-double.c @@ -0,0 +1,104 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple x86_64-linux-gnu -target-cpu core2 %s -emit-llvm -o - | FileCheck -check-prefixes=X64 %s +// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu core2 %s -emit-llvm -o - | FileCheck -check-prefixes=X86 %s + + +// X64-LABEL: define dso_local double @test_double_post_inc( +// X64-SAME: ) #[[ATTR0:[0-9]+]] { +// X64-NEXT: entry: +// X64-NEXT: [[RETVAL:%.*]] = alloca double, align 8 +// X64-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr @test_double_post_inc.n, float 1.000000e+00 seq_cst, align 8 +// X64-NEXT: store float [[TMP0]], ptr [[RETVAL]], align 8 +// X64-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL]], align 8 +// X64-NEXT: ret double [[TMP1]] +// +// X86-LABEL: define dso_local double @test_double_post_inc( +// X86-SAME: ) #[[ATTR0:[0-9]+]] { +// X86-NEXT: entry: +// X86-NEXT: [[RETVAL:%.*]] = alloca double, align 4 +// X86-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr @test_double_post_inc.n, float 1.000000e+00 seq_cst, align 8 +// X86-NEXT: store float [[TMP0]], ptr [[RETVAL]], align 4 +// X86-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL]], align 4 +// X86-NEXT: ret double [[TMP1]] +// +double test_double_post_inc() +{ + static _Atomic double n; + return n++; +} + +// X64-LABEL: define dso_local double @test_double_post_dc( +// X64-SAME: ) #[[ATTR0]] { +// X64-NEXT: entry: +// X64-NEXT: [[RETVAL:%.*]] = alloca double, align 8 +// X64-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr @test_double_post_dc.n, float 1.000000e+00 seq_cst, align 8 +// X64-NEXT: store float [[TMP0]], ptr [[RETVAL]], align 8 +// X64-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL]], align 8 +// X64-NEXT: ret double [[TMP1]] +// +// X86-LABEL: define dso_local double @test_double_post_dc( +// X86-SAME: ) #[[ATTR0]] { +// X86-NEXT: entry: +// X86-NEXT: [[RETVAL:%.*]] = alloca double, align 4 +// X86-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr @test_double_post_dc.n, float 1.000000e+00 seq_cst, align 8 +// X86-NEXT: store float [[TMP0]], ptr [[RETVAL]], align 4 +// X86-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL]], align 4 +// X86-NEXT: ret double [[TMP1]] +// +double test_double_post_dc() +{ + static _Atomic double n; + return n--; +} + +// X64-LABEL: define dso_local double @test_double_pre_dc( +// X64-SAME: ) #[[ATTR0]] { +// X64-NEXT: entry: +// X64-NEXT: [[RETVAL:%.*]] = alloca double, align 8 +// X64-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr @test_double_pre_dc.n, float 1.000000e+00 seq_cst, align 8 +// X64-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00 +// X64-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 8 +// X64-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL]], align 8 +// X64-NEXT: ret double [[TMP2]] +// +// X86-LABEL: define dso_local double @test_double_pre_dc( +// X86-SAME: ) #[[ATTR0]] { +// X86-NEXT: entry: +// X86-NEXT: [[RETVAL:%.*]] = alloca double, align 4 +// X86-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr @test_double_pre_dc.n, float 1.000000e+00 seq_cst, align 8 +// X86-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00 +// X86-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 4 +// X86-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL]], align 4 +// X86-NEXT: ret double [[TMP2]] +// +double test_double_pre_dc() +{ + static _Atomic double n; + return --n; +} + +// X64-LABEL: define dso_local double @test_double_pre_inc( +// X64-SAME: ) #[[ATTR0]] { +// X64-NEXT: entry: +// X64-NEXT: [[RETVAL:%.*]] = alloca double, align 8 +// X64-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr @test_double_pre_inc.n, float 1.000000e+00 seq_cst, align 8 +// X64-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00 +// X64-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 8 +// X64-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL]], align 8 +// X64-NEXT: ret double [[TMP2]] +// +// X86-LABEL: define dso_local double @test_double_pre_inc( +// X86-SAME: ) #[[ATTR0]] { +// X86-NEXT: entry: +// X86-NEXT: [[RETVAL:%.*]] = alloca double, align 4 +// X86-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr @test_double_pre_inc.n, float 1.000000e+00 seq_cst, align 8 +// X86-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00 +// X86-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 4 +// X86-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL]], align 4 +// X86-NEXT: ret double [[TMP2]] +// +double test_double_pre_inc() +{ + static _Atomic double n; + return ++n; +} diff --git a/clang/test/CodeGen/X86/x86-atomic-float.c b/clang/test/CodeGen/X86/x86-atomic-float.c index 2d3c72d2a0299..6ee441c2dd7a8 100644 --- a/clang/test/CodeGen/X86/x86-atomic-float.c +++ b/clang/test/CodeGen/X86/x86-atomic-float.c @@ -1,11 +1,11 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 -// RUN: %clang_cc1 -triple x86_64-linux-gnu -target-cpu core2 %s -emit-llvm -o - | FileCheck -check-prefixes=CHECK,CHECK64 %s -// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu core2 %s -emit-llvm -o - | FileCheck -check-prefixes=CHECK,CHECK32 %s +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple x86_64-linux-gnu -target-cpu core2 %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu core2 %s -emit-llvm -o - | FileCheck %s // CHECK-LABEL: define dso_local i32 @test_int_inc( // CHECK-SAME: ) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = atomicrmw add ptr @test_int_inc.n, i32 1 seq_cst, align 4 // CHECK-NEXT: ret i32 [[TMP0]] // @@ -17,7 +17,7 @@ int test_int_inc() // CHECK-LABEL: define dso_local float @test_float_post_inc( // CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: entry: +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr @test_float_post_inc.n, float 1.000000e+00 seq_cst, align 4 // CHECK-NEXT: ret float [[TMP0]] // @@ -29,7 +29,7 @@ float test_float_post_inc() // CHECK-LABEL: define dso_local float @test_float_post_dc( // CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: entry: +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr @test_float_post_dc.n, float 1.000000e+00 seq_cst, align 4 // CHECK-NEXT: ret float [[TMP0]] // @@ -41,7 +41,7 @@ float test_float_post_dc() // CHECK-LABEL: define dso_local float @test_float_pre_dc( // CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: entry: +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr @test_float_pre_dc.n, float 1.000000e+00 seq_cst, align 4 // CHECK-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00 // CHECK-NEXT: ret float [[TMP1]] @@ -54,7 +54,7 @@ float test_float_pre_dc() // CHECK-LABEL: define dso_local float @test_float_pre_inc( // CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: entry: +// CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr @test_float_pre_inc.n, float 1.000000e+00 seq_cst, align 4 // CHECK-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00 // CHECK-NEXT: ret float [[TMP1]] @@ -64,6 +64,3 @@ float test_float_pre_inc() static _Atomic float n; return ++n; } -//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -// CHECK32: {{.*}} -// CHECK64: {{.*}} diff --git a/clang/test/CodeGen/X86/x86-atomic-long_double.c b/clang/test/CodeGen/X86/x86-atomic-long_double.c index 74a22d5db151e..2c3f381f13511 100644 --- a/clang/test/CodeGen/X86/x86-atomic-long_double.c +++ b/clang/test/CodeGen/X86/x86-atomic-long_double.c @@ -1,170 +1,171 @@ -// RUN: %clang_cc1 -triple x86_64-linux-gnu -target-cpu core2 %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu core2 %s -emit-llvm -o - | FileCheck -check-prefix=CHECK32 %s +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple x86_64-linux-gnu -target-cpu core2 %s -emit-llvm -o - | FileCheck --check-prefixes=X64 %s +// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu core2 %s -emit-llvm -o - | FileCheck --check-prefixes=X86 %s -// CHECK-LABEL: define dso_local x86_fp80 @testinc( -// CHECK-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 16 -// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr [[TMP0]], float 1.000000e+00 seq_cst, align 16 -// CHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], 1.000000e+00 -// CHECK-NEXT: store float [[TMP2]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret x86_fp80 [[TMP3]] +// X64-LABEL: define dso_local x86_fp80 @testinc( +// X64-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] { +// X64-NEXT: [[ENTRY:.*:]] +// X64-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8 +// X64-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8 +// X64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 +// X64-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr [[TMP0]], float 1.000000e+00 seq_cst, align 16 +// X64-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], 1.000000e+00 +// X64-NEXT: store float [[TMP2]], ptr [[RETVAL]], align 16 +// X64-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16 +// X64-NEXT: ret x86_fp80 [[TMP3]] // -// CHECK32-LABEL: define dso_local x86_fp80 @testinc( -// CHECK32-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK32-NEXT: entry: -// CHECK32-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 4 -// CHECK32-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK32-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4 -// CHECK32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 -// CHECK32-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr [[TMP0]], float 1.000000e+00 seq_cst, align 4 -// CHECK32-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], 1.000000e+00 -// CHECK32-NEXT: store float [[TMP2]], ptr [[RETVAL]], align 4 -// CHECK32-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 4 -// CHECK32-NEXT: ret x86_fp80 [[TMP3]] +// X86-LABEL: define dso_local x86_fp80 @testinc( +// X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 +// X86-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr [[TMP0]], float 1.000000e+00 seq_cst, align 4 +// X86-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], 1.000000e+00 +// X86-NEXT: store float [[TMP2]], ptr [[RETVAL]], align 4 +// X86-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 4 +// X86-NEXT: ret x86_fp80 [[TMP3]] // long double testinc(_Atomic long double *addr) { return ++*addr; } -// CHECK-LABEL: define dso_local x86_fp80 @testdec( -// CHECK-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 16 -// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw fsub ptr [[TMP0]], float 1.000000e+00 seq_cst, align 16 -// CHECK-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret x86_fp80 [[TMP2]] +// X64-LABEL: define dso_local x86_fp80 @testdec( +// X64-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { +// X64-NEXT: [[ENTRY:.*:]] +// X64-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8 +// X64-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8 +// X64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 +// X64-NEXT: [[TMP1:%.*]] = atomicrmw fsub ptr [[TMP0]], float 1.000000e+00 seq_cst, align 16 +// X64-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 16 +// X64-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16 +// X64-NEXT: ret x86_fp80 [[TMP2]] // -// CHECK32-LABEL: define dso_local x86_fp80 @testdec( -// CHECK32-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { -// CHECK32-NEXT: entry: -// CHECK32-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 4 -// CHECK32-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK32-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4 -// CHECK32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 -// CHECK32-NEXT: [[TMP1:%.*]] = atomicrmw fsub ptr [[TMP0]], float 1.000000e+00 seq_cst, align 4 -// CHECK32-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 4 -// CHECK32-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 4 -// CHECK32-NEXT: ret x86_fp80 [[TMP2]] +// X86-LABEL: define dso_local x86_fp80 @testdec( +// X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 +// X86-NEXT: [[TMP1:%.*]] = atomicrmw fsub ptr [[TMP0]], float 1.000000e+00 seq_cst, align 4 +// X86-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 4 +// X86-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 4 +// X86-NEXT: ret x86_fp80 [[TMP2]] // long double testdec(_Atomic long double *addr) { return (*addr)--; } -// CHECK-LABEL: define dso_local x86_fp80 @testcompassign( -// CHECK-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 16 -// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 16 -// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 16 -// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 16 -// CHECK-NEXT: [[ATOMIC_TEMP5:%.*]] = alloca x86_fp80, align 16 -// CHECK-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 -// CHECK-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i128, ptr [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 16 -// CHECK-NEXT: br label [[ATOMIC_OP:%.*]] -// CHECK: atomic_op: -// CHECK-NEXT: [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP8:%.*]], [[ATOMIC_OP]] ] -// CHECK-NEXT: [[SUB:%.*]] = fsub x86_fp80 [[TMP2]], 0xK4003C800000000000000 -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP1]], i8 0, i64 16, i1 false) -// CHECK-NEXT: store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 16 -// CHECK-NEXT: [[TMP3:%.*]] = load i128, ptr [[ATOMIC_TEMP1]], align 16 -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP2]], i8 0, i64 16, i1 false) -// CHECK-NEXT: store x86_fp80 [[SUB]], ptr [[ATOMIC_TEMP2]], align 16 -// CHECK-NEXT: [[TMP4:%.*]] = load i128, ptr [[ATOMIC_TEMP2]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[TMP0]], i128 [[TMP3]], i128 [[TMP4]] seq_cst seq_cst, align 16 -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i128, i1 } [[TMP5]], 0 -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i128, i1 } [[TMP5]], 1 -// CHECK-NEXT: store i128 [[TMP6]], ptr [[ATOMIC_TEMP3]], align 16 -// CHECK-NEXT: [[TMP8]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 16 -// CHECK-NEXT: br i1 [[TMP7]], label [[ATOMIC_CONT:%.*]], label [[ATOMIC_OP]] -// CHECK: atomic_cont: -// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 -// CHECK-NEXT: [[ATOMIC_LOAD4:%.*]] = load atomic i128, ptr [[TMP9]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[ATOMIC_LOAD4]], ptr [[ATOMIC_TEMP5]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP5]], align 16 -// CHECK-NEXT: ret x86_fp80 [[TMP10]] +// X64-LABEL: define dso_local x86_fp80 @testcompassign( +// X64-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { +// X64-NEXT: [[ENTRY:.*]]: +// X64-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8 +// X64-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_TEMP5:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8 +// X64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 +// X64-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i128, ptr [[TMP0]] seq_cst, align 16 +// X64-NEXT: store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP]], align 16 +// X64-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 16 +// X64-NEXT: br label %[[ATOMIC_OP:.*]] +// X64: [[ATOMIC_OP]]: +// X64-NEXT: [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], %[[ENTRY]] ], [ [[TMP8:%.*]], %[[ATOMIC_OP]] ] +// X64-NEXT: [[SUB:%.*]] = fsub x86_fp80 [[TMP2]], 0xK4003C800000000000000 +// X64-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP1]], i8 0, i64 16, i1 false) +// X64-NEXT: store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 16 +// X64-NEXT: [[TMP3:%.*]] = load i128, ptr [[ATOMIC_TEMP1]], align 16 +// X64-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP2]], i8 0, i64 16, i1 false) +// X64-NEXT: store x86_fp80 [[SUB]], ptr [[ATOMIC_TEMP2]], align 16 +// X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[ATOMIC_TEMP2]], align 16 +// X64-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[TMP0]], i128 [[TMP3]], i128 [[TMP4]] seq_cst seq_cst, align 16 +// X64-NEXT: [[TMP6:%.*]] = extractvalue { i128, i1 } [[TMP5]], 0 +// X64-NEXT: [[TMP7:%.*]] = extractvalue { i128, i1 } [[TMP5]], 1 +// X64-NEXT: store i128 [[TMP6]], ptr [[ATOMIC_TEMP3]], align 16 +// X64-NEXT: [[TMP8]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 16 +// X64-NEXT: br i1 [[TMP7]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]] +// X64: [[ATOMIC_CONT]]: +// X64-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 +// X64-NEXT: [[ATOMIC_LOAD4:%.*]] = load atomic i128, ptr [[TMP9]] seq_cst, align 16 +// X64-NEXT: store i128 [[ATOMIC_LOAD4]], ptr [[ATOMIC_TEMP5]], align 16 +// X64-NEXT: [[TMP10:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP5]], align 16 +// X64-NEXT: ret x86_fp80 [[TMP10]] // -// CHECK32-LABEL: define dso_local x86_fp80 @testcompassign( -// CHECK32-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { -// CHECK32-NEXT: entry: -// CHECK32-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK32-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 4 -// CHECK32-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4 -// CHECK32-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 4 -// CHECK32-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 4 -// CHECK32-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4 -// CHECK32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 -// CHECK32-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5) -// CHECK32-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 4 -// CHECK32-NEXT: br label [[ATOMIC_OP:%.*]] -// CHECK32: atomic_op: -// CHECK32-NEXT: [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[ATOMIC_OP]] ] -// CHECK32-NEXT: [[SUB:%.*]] = fsub x86_fp80 [[TMP2]], 0xK4003C800000000000000 -// CHECK32-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP1]], i8 0, i64 12, i1 false) -// CHECK32-NEXT: store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 4 -// CHECK32-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP2]], i8 0, i64 12, i1 false) -// CHECK32-NEXT: store x86_fp80 [[SUB]], ptr [[ATOMIC_TEMP2]], align 4 -// CHECK32-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP1]], ptr noundef [[ATOMIC_TEMP2]], i32 noundef 5, i32 noundef 5) -// CHECK32-NEXT: [[TMP3]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4 -// CHECK32-NEXT: br i1 [[CALL]], label [[ATOMIC_CONT:%.*]], label [[ATOMIC_OP]] -// CHECK32: atomic_cont: -// CHECK32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 -// CHECK32-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef 5) -// CHECK32-NEXT: [[TMP5:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 4 -// CHECK32-NEXT: ret x86_fp80 [[TMP5]] +// X86-LABEL: define dso_local x86_fp80 @testcompassign( +// X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { +// X86-NEXT: [[ENTRY:.*]]: +// X86-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 +// X86-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5) +// X86-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 4 +// X86-NEXT: br label %[[ATOMIC_OP:.*]] +// X86: [[ATOMIC_OP]]: +// X86-NEXT: [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], %[[ENTRY]] ], [ [[TMP3:%.*]], %[[ATOMIC_OP]] ] +// X86-NEXT: [[SUB:%.*]] = fsub x86_fp80 [[TMP2]], 0xK4003C800000000000000 +// X86-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP1]], i8 0, i64 12, i1 false) +// X86-NEXT: store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 4 +// X86-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP2]], i8 0, i64 12, i1 false) +// X86-NEXT: store x86_fp80 [[SUB]], ptr [[ATOMIC_TEMP2]], align 4 +// X86-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP1]], ptr noundef [[ATOMIC_TEMP2]], i32 noundef 5, i32 noundef 5) +// X86-NEXT: [[TMP3]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4 +// X86-NEXT: br i1 [[CALL]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]] +// X86: [[ATOMIC_CONT]]: +// X86-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 +// X86-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef 5) +// X86-NEXT: [[TMP5:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 4 +// X86-NEXT: ret x86_fp80 [[TMP5]] // long double testcompassign(_Atomic long double *addr) { *addr -= 25; return *addr; } -// CHECK-LABEL: define dso_local x86_fp80 @testassign( -// CHECK-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 16 -// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 16 -// CHECK-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP]], i8 0, i64 16, i1 false) -// CHECK-NEXT: store x86_fp80 0xK4005E600000000000000, ptr [[ATOMIC_TEMP]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 -// CHECK-NEXT: store atomic i128 [[TMP1]], ptr [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 -// CHECK-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i128, ptr [[TMP2]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP1]], align 16 -// CHECK-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 16 -// CHECK-NEXT: ret x86_fp80 [[TMP3]] +// X64-LABEL: define dso_local x86_fp80 @testassign( +// X64-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { +// X64-NEXT: [[ENTRY:.*:]] +// X64-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8 +// X64-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8 +// X64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 +// X64-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP]], i8 0, i64 16, i1 false) +// X64-NEXT: store x86_fp80 0xK4005E600000000000000, ptr [[ATOMIC_TEMP]], align 16 +// X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 +// X64-NEXT: store atomic i128 [[TMP1]], ptr [[TMP0]] seq_cst, align 16 +// X64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 +// X64-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i128, ptr [[TMP2]] seq_cst, align 16 +// X64-NEXT: store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP1]], align 16 +// X64-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 16 +// X64-NEXT: ret x86_fp80 [[TMP3]] // -// CHECK32-LABEL: define dso_local x86_fp80 @testassign( -// CHECK32-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { -// CHECK32-NEXT: entry: -// CHECK32-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK32-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 4 -// CHECK32-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4 -// CHECK32-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4 -// CHECK32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 -// CHECK32-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP]], i8 0, i64 12, i1 false) -// CHECK32-NEXT: store x86_fp80 0xK4005E600000000000000, ptr [[ATOMIC_TEMP]], align 4 -// CHECK32-NEXT: call void @__atomic_store(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5) -// CHECK32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 -// CHECK32-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef 5) -// CHECK32-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4 -// CHECK32-NEXT: ret x86_fp80 [[TMP2]] +// X86-LABEL: define dso_local x86_fp80 @testassign( +// X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 +// X86-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP]], i8 0, i64 12, i1 false) +// X86-NEXT: store x86_fp80 0xK4005E600000000000000, ptr [[ATOMIC_TEMP]], align 4 +// X86-NEXT: call void @__atomic_store(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5) +// X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 +// X86-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef 5) +// X86-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4 +// X86-NEXT: ret x86_fp80 [[TMP2]] // long double testassign(_Atomic long double *addr) { *addr = 115; @@ -172,168 +173,168 @@ long double testassign(_Atomic long double *addr) { return *addr; } -// CHECK-LABEL: define dso_local x86_fp80 @test_volatile_inc( -// CHECK-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 16 -// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr [[TMP0]], float 1.000000e+00 seq_cst, align 16 -// CHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], 1.000000e+00 -// CHECK-NEXT: store float [[TMP2]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret x86_fp80 [[TMP3]] +// X64-LABEL: define dso_local x86_fp80 @test_volatile_inc( +// X64-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { +// X64-NEXT: [[ENTRY:.*:]] +// X64-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8 +// X64-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8 +// X64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 +// X64-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr [[TMP0]], float 1.000000e+00 seq_cst, align 16 +// X64-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], 1.000000e+00 +// X64-NEXT: store float [[TMP2]], ptr [[RETVAL]], align 16 +// X64-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16 +// X64-NEXT: ret x86_fp80 [[TMP3]] // -// CHECK32-LABEL: define dso_local x86_fp80 @test_volatile_inc( -// CHECK32-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { -// CHECK32-NEXT: entry: -// CHECK32-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 4 -// CHECK32-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK32-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4 -// CHECK32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 -// CHECK32-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr [[TMP0]], float 1.000000e+00 seq_cst, align 4 -// CHECK32-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], 1.000000e+00 -// CHECK32-NEXT: store float [[TMP2]], ptr [[RETVAL]], align 4 -// CHECK32-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 4 -// CHECK32-NEXT: ret x86_fp80 [[TMP3]] +// X86-LABEL: define dso_local x86_fp80 @test_volatile_inc( +// X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 +// X86-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr [[TMP0]], float 1.000000e+00 seq_cst, align 4 +// X86-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], 1.000000e+00 +// X86-NEXT: store float [[TMP2]], ptr [[RETVAL]], align 4 +// X86-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 4 +// X86-NEXT: ret x86_fp80 [[TMP3]] // long double test_volatile_inc(volatile _Atomic long double *addr) { return ++*addr; } -// CHECK-LABEL: define dso_local x86_fp80 @test_volatile_dec( -// CHECK-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 16 -// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw fsub ptr [[TMP0]], float 1.000000e+00 seq_cst, align 16 -// CHECK-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret x86_fp80 [[TMP2]] +// X64-LABEL: define dso_local x86_fp80 @test_volatile_dec( +// X64-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { +// X64-NEXT: [[ENTRY:.*:]] +// X64-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8 +// X64-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8 +// X64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 +// X64-NEXT: [[TMP1:%.*]] = atomicrmw fsub ptr [[TMP0]], float 1.000000e+00 seq_cst, align 16 +// X64-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 16 +// X64-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16 +// X64-NEXT: ret x86_fp80 [[TMP2]] // -// CHECK32-LABEL: define dso_local x86_fp80 @test_volatile_dec( -// CHECK32-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { -// CHECK32-NEXT: entry: -// CHECK32-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 4 -// CHECK32-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK32-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4 -// CHECK32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 -// CHECK32-NEXT: [[TMP1:%.*]] = atomicrmw fsub ptr [[TMP0]], float 1.000000e+00 seq_cst, align 4 -// CHECK32-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 4 -// CHECK32-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 4 -// CHECK32-NEXT: ret x86_fp80 [[TMP2]] +// X86-LABEL: define dso_local x86_fp80 @test_volatile_dec( +// X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 +// X86-NEXT: [[TMP1:%.*]] = atomicrmw fsub ptr [[TMP0]], float 1.000000e+00 seq_cst, align 4 +// X86-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 4 +// X86-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 4 +// X86-NEXT: ret x86_fp80 [[TMP2]] // long double test_volatile_dec(volatile _Atomic long double *addr) { return (*addr)--; } -// CHECK-LABEL: define dso_local x86_fp80 @test_volatile_compassign( -// CHECK-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 16 -// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 16 -// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 16 -// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 16 -// CHECK-NEXT: [[ATOMIC_TEMP5:%.*]] = alloca x86_fp80, align 16 -// CHECK-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 -// CHECK-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic volatile i128, ptr [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 16 -// CHECK-NEXT: br label [[ATOMIC_OP:%.*]] -// CHECK: atomic_op: -// CHECK-NEXT: [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP8:%.*]], [[ATOMIC_OP]] ] -// CHECK-NEXT: [[SUB:%.*]] = fsub x86_fp80 [[TMP2]], 0xK4003C800000000000000 -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP1]], i8 0, i64 16, i1 false) -// CHECK-NEXT: store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 16 -// CHECK-NEXT: [[TMP3:%.*]] = load i128, ptr [[ATOMIC_TEMP1]], align 16 -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP2]], i8 0, i64 16, i1 false) -// CHECK-NEXT: store x86_fp80 [[SUB]], ptr [[ATOMIC_TEMP2]], align 16 -// CHECK-NEXT: [[TMP4:%.*]] = load i128, ptr [[ATOMIC_TEMP2]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = cmpxchg volatile ptr [[TMP0]], i128 [[TMP3]], i128 [[TMP4]] seq_cst seq_cst, align 16 -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i128, i1 } [[TMP5]], 0 -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i128, i1 } [[TMP5]], 1 -// CHECK-NEXT: store i128 [[TMP6]], ptr [[ATOMIC_TEMP3]], align 16 -// CHECK-NEXT: [[TMP8]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 16 -// CHECK-NEXT: br i1 [[TMP7]], label [[ATOMIC_CONT:%.*]], label [[ATOMIC_OP]] -// CHECK: atomic_cont: -// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 -// CHECK-NEXT: [[ATOMIC_LOAD4:%.*]] = load atomic volatile i128, ptr [[TMP9]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[ATOMIC_LOAD4]], ptr [[ATOMIC_TEMP5]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP5]], align 16 -// CHECK-NEXT: ret x86_fp80 [[TMP10]] +// X64-LABEL: define dso_local x86_fp80 @test_volatile_compassign( +// X64-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { +// X64-NEXT: [[ENTRY:.*]]: +// X64-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8 +// X64-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_TEMP5:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8 +// X64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 +// X64-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic volatile i128, ptr [[TMP0]] seq_cst, align 16 +// X64-NEXT: store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP]], align 16 +// X64-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 16 +// X64-NEXT: br label %[[ATOMIC_OP:.*]] +// X64: [[ATOMIC_OP]]: +// X64-NEXT: [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], %[[ENTRY]] ], [ [[TMP8:%.*]], %[[ATOMIC_OP]] ] +// X64-NEXT: [[SUB:%.*]] = fsub x86_fp80 [[TMP2]], 0xK4003C800000000000000 +// X64-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP1]], i8 0, i64 16, i1 false) +// X64-NEXT: store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 16 +// X64-NEXT: [[TMP3:%.*]] = load i128, ptr [[ATOMIC_TEMP1]], align 16 +// X64-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP2]], i8 0, i64 16, i1 false) +// X64-NEXT: store x86_fp80 [[SUB]], ptr [[ATOMIC_TEMP2]], align 16 +// X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[ATOMIC_TEMP2]], align 16 +// X64-NEXT: [[TMP5:%.*]] = cmpxchg volatile ptr [[TMP0]], i128 [[TMP3]], i128 [[TMP4]] seq_cst seq_cst, align 16 +// X64-NEXT: [[TMP6:%.*]] = extractvalue { i128, i1 } [[TMP5]], 0 +// X64-NEXT: [[TMP7:%.*]] = extractvalue { i128, i1 } [[TMP5]], 1 +// X64-NEXT: store i128 [[TMP6]], ptr [[ATOMIC_TEMP3]], align 16 +// X64-NEXT: [[TMP8]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 16 +// X64-NEXT: br i1 [[TMP7]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]] +// X64: [[ATOMIC_CONT]]: +// X64-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 +// X64-NEXT: [[ATOMIC_LOAD4:%.*]] = load atomic volatile i128, ptr [[TMP9]] seq_cst, align 16 +// X64-NEXT: store i128 [[ATOMIC_LOAD4]], ptr [[ATOMIC_TEMP5]], align 16 +// X64-NEXT: [[TMP10:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP5]], align 16 +// X64-NEXT: ret x86_fp80 [[TMP10]] // -// CHECK32-LABEL: define dso_local x86_fp80 @test_volatile_compassign( -// CHECK32-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { -// CHECK32-NEXT: entry: -// CHECK32-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK32-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 4 -// CHECK32-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4 -// CHECK32-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 4 -// CHECK32-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 4 -// CHECK32-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4 -// CHECK32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 -// CHECK32-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5) -// CHECK32-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 4 -// CHECK32-NEXT: br label [[ATOMIC_OP:%.*]] -// CHECK32: atomic_op: -// CHECK32-NEXT: [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[ATOMIC_OP]] ] -// CHECK32-NEXT: [[SUB:%.*]] = fsub x86_fp80 [[TMP2]], 0xK4003C800000000000000 -// CHECK32-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP1]], i8 0, i64 12, i1 false) -// CHECK32-NEXT: store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 4 -// CHECK32-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP2]], i8 0, i64 12, i1 false) -// CHECK32-NEXT: store x86_fp80 [[SUB]], ptr [[ATOMIC_TEMP2]], align 4 -// CHECK32-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP1]], ptr noundef [[ATOMIC_TEMP2]], i32 noundef 5, i32 noundef 5) -// CHECK32-NEXT: [[TMP3]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4 -// CHECK32-NEXT: br i1 [[CALL]], label [[ATOMIC_CONT:%.*]], label [[ATOMIC_OP]] -// CHECK32: atomic_cont: -// CHECK32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 -// CHECK32-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef 5) -// CHECK32-NEXT: [[TMP5:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 4 -// CHECK32-NEXT: ret x86_fp80 [[TMP5]] +// X86-LABEL: define dso_local x86_fp80 @test_volatile_compassign( +// X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { +// X86-NEXT: [[ENTRY:.*]]: +// X86-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 +// X86-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5) +// X86-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 4 +// X86-NEXT: br label %[[ATOMIC_OP:.*]] +// X86: [[ATOMIC_OP]]: +// X86-NEXT: [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], %[[ENTRY]] ], [ [[TMP3:%.*]], %[[ATOMIC_OP]] ] +// X86-NEXT: [[SUB:%.*]] = fsub x86_fp80 [[TMP2]], 0xK4003C800000000000000 +// X86-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP1]], i8 0, i64 12, i1 false) +// X86-NEXT: store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 4 +// X86-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP2]], i8 0, i64 12, i1 false) +// X86-NEXT: store x86_fp80 [[SUB]], ptr [[ATOMIC_TEMP2]], align 4 +// X86-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP1]], ptr noundef [[ATOMIC_TEMP2]], i32 noundef 5, i32 noundef 5) +// X86-NEXT: [[TMP3]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4 +// X86-NEXT: br i1 [[CALL]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]] +// X86: [[ATOMIC_CONT]]: +// X86-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 +// X86-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef 5) +// X86-NEXT: [[TMP5:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 4 +// X86-NEXT: ret x86_fp80 [[TMP5]] // long double test_volatile_compassign(volatile _Atomic long double *addr) { *addr -= 25; return *addr; } -// CHECK-LABEL: define dso_local x86_fp80 @test_volatile_assign( -// CHECK-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 16 -// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 16 -// CHECK-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP]], i8 0, i64 16, i1 false) -// CHECK-NEXT: store x86_fp80 0xK4005E600000000000000, ptr [[ATOMIC_TEMP]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 -// CHECK-NEXT: store atomic volatile i128 [[TMP1]], ptr [[TMP0]] seq_cst, align 16 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 -// CHECK-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic volatile i128, ptr [[TMP2]] seq_cst, align 16 -// CHECK-NEXT: store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP1]], align 16 -// CHECK-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 16 -// CHECK-NEXT: ret x86_fp80 [[TMP3]] +// X64-LABEL: define dso_local x86_fp80 @test_volatile_assign( +// X64-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { +// X64-NEXT: [[ENTRY:.*:]] +// X64-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8 +// X64-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8 +// X64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 +// X64-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP]], i8 0, i64 16, i1 false) +// X64-NEXT: store x86_fp80 0xK4005E600000000000000, ptr [[ATOMIC_TEMP]], align 16 +// X64-NEXT: [[TMP1:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 +// X64-NEXT: store atomic volatile i128 [[TMP1]], ptr [[TMP0]] seq_cst, align 16 +// X64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 +// X64-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic volatile i128, ptr [[TMP2]] seq_cst, align 16 +// X64-NEXT: store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP1]], align 16 +// X64-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 16 +// X64-NEXT: ret x86_fp80 [[TMP3]] // -// CHECK32-LABEL: define dso_local x86_fp80 @test_volatile_assign( -// CHECK32-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { -// CHECK32-NEXT: entry: -// CHECK32-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK32-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 4 -// CHECK32-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4 -// CHECK32-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4 -// CHECK32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 -// CHECK32-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP]], i8 0, i64 12, i1 false) -// CHECK32-NEXT: store x86_fp80 0xK4005E600000000000000, ptr [[ATOMIC_TEMP]], align 4 -// CHECK32-NEXT: call void @__atomic_store(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5) -// CHECK32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 -// CHECK32-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef 5) -// CHECK32-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4 -// CHECK32-NEXT: ret x86_fp80 [[TMP2]] +// X86-LABEL: define dso_local x86_fp80 @test_volatile_assign( +// X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 +// X86-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP]], i8 0, i64 12, i1 false) +// X86-NEXT: store x86_fp80 0xK4005E600000000000000, ptr [[ATOMIC_TEMP]], align 4 +// X86-NEXT: call void @__atomic_store(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5) +// X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 +// X86-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef 5) +// X86-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4 +// X86-NEXT: ret x86_fp80 [[TMP2]] // long double test_volatile_assign(volatile _Atomic long double *addr) { *addr = 115; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c index 57ea4d2a1ac47..21a8229bbf244 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c @@ -1,14 +1,14 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 \ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 \ +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 \ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 \ +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 \ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 \ // RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include @@ -745,3 +745,67 @@ svfloat32x4_t test_svclamp_single_f32_x4(svfloat32x4_t op1, svfloat32_t op2, svf svfloat64x4_t test_svclamp_single_f64_x4(svfloat64x4_t op1, svfloat64_t op2, svfloat64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f64_x4, , )(op1, op2, op3); } + +// CHECK-LABEL: @test_svclamp_single_bf16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[OP1]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.bfclamp.single.x2.nxv8bf16( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z27test_svclamp_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_tS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[OP1]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.bfclamp.single.x2.nxv8bf16( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svbfloat16x2_t test_svclamp_single_bf16_x2(svbfloat16x2_t op1, svbfloat16_t op2, svbfloat16_t op3) __arm_streaming { + return SVE_ACLE_FUNC(svclamp, _single_bf16_x2, , )(op1, op2, op3); +} + +// CHECK-LABEL: @test_svclamp_single_bf16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[OP1]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[OP1]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[OP1]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.bfclamp.single.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 16) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 24) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z27test_svclamp_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_tS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[OP1]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[OP1]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[OP1]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.bfclamp.single.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 16) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svbfloat16x4_t test_svclamp_single_bf16_x4(svbfloat16x4_t op1, svbfloat16_t op2, svbfloat16_t op3) __arm_streaming { + return SVE_ACLE_FUNC(svclamp, _single_bf16_x4, , )(op1, op2, op3); +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c index 4a5ee7e021f74..e26499d3a63cc 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c @@ -497,3 +497,25 @@ svuint8_t test_qcvt_u8_s32_x4(svint32x4_t zn) __arm_streaming { svuint16_t test_qcvt_u16_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqcvt_u16,_s64_x4,,)(zn); } + +// CHECK-LABEL: @test_cvt_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvt.widen.x2.nxv4f32( [[ZN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z15test_cvt_f32_x2u13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvt.widen.x2.nxv4f32( [[ZN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +__attribute__((target("sme-f16f16"))) svfloat32x2_t test_cvt_f32_x2(svfloat16_t zn) __arm_streaming { + return SVE_ACLE_FUNC(svcvt_f32,_f16_x2,,)(zn); +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c new file mode 100644 index 0000000000000..453dd3db6adf0 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c @@ -0,0 +1,40 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-f16f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-f16f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-f16f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-f16f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-f16f16 -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +// CHECK-LABEL: @test_cvtl_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtl.widen.x2.nxv4f32( [[ZN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z16test_cvtl_f32_x2u13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtl.widen.x2.nxv4f32( [[ZN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svfloat32x2_t test_cvtl_f32_x2(svfloat16_t zn) __arm_streaming { + return SVE_ACLE_FUNC(svcvtl_f32,_f16_x2,,)(zn); +} diff --git a/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c new file mode 100644 index 0000000000000..2ad2044c267ed --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c @@ -0,0 +1,139 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#define SVE_ACLE_FUNC(A1,A2) A1##A2 + +// CHECK-LABEL: define dso_local void @test_svzero_za64_vg1x2( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg1x2(i32 [[SLICE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg1x2j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg1x2(i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret void +// +void test_svzero_za64_vg1x2(uint32_t slice) __arm_streaming __arm_inout("za") +{ + SVE_ACLE_FUNC(svzero_za64,_vg1x2)(slice); +} + +// CHECK-LABEL: define dso_local void @test_svzero_za64_vg1x4( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg1x4(i32 [[SLICE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg1x4j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg1x4(i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret void +// +void test_svzero_za64_vg1x4(uint32_t slice) __arm_streaming __arm_inout("za"){ + SVE_ACLE_FUNC(svzero_za64,_vg1x4)(slice); +} + +// CHECK-LABEL: define dso_local void @test_svzero_za64_vg2x1( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg2x1(i32 [[SLICE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg2x1j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg2x1(i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret void +// +void test_svzero_za64_vg2x1(uint32_t slice) __arm_streaming __arm_inout("za"){ + SVE_ACLE_FUNC(svzero_za64,_vg2x1)(slice); +} + +// CHECK-LABEL: define dso_local void @test_svzero_za64_vg2x2( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg2x2(i32 [[SLICE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg2x2j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg2x2(i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret void +// +void test_svzero_za64_vg2x2(uint32_t slice) __arm_streaming __arm_inout("za"){ + SVE_ACLE_FUNC(svzero_za64,_vg2x2)(slice); +} + +// CHECK-LABEL: define dso_local void @test_svzero_za64_vg2x4( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg2x4(i32 [[SLICE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg2x4j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg2x4(i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret void +// +void test_svzero_za64_vg2x4(uint32_t slice) __arm_streaming __arm_inout("za"){ + SVE_ACLE_FUNC(svzero_za64,_vg2x4)(slice); +} + +// CHECK-LABEL: define dso_local void @test_svzero_za64_vg4x1( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg4x1(i32 [[SLICE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg4x1j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg4x1(i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret void +// +void test_svzero_za64_vg4x1(uint32_t slice) __arm_streaming __arm_inout("za"){ + SVE_ACLE_FUNC(svzero_za64,_vg4x1)(slice); +} + +// CHECK-LABEL: define dso_local void @test_svzero_za64_vg4x2( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg4x2(i32 [[SLICE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg4x2j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg4x2(i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret void +// +void test_svzero_za64_vg4x2(uint32_t slice) __arm_streaming __arm_inout("za"){ + SVE_ACLE_FUNC(svzero_za64,_vg4x2)(slice); +} + +// CHECK-LABEL: define dso_local void @test_svzero_za64_vg4x4( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg4x4(i32 [[SLICE]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg4x4j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg4x4(i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret void +// +void test_svzero_za64_vg4x4(uint32_t slice) __arm_streaming __arm_inout("za"){ + SVE_ACLE_FUNC(svzero_za64,_vg4x4)(slice); +} diff --git a/clang/test/CodeGen/attr-noinline.cpp b/clang/test/CodeGen/attr-noinline.cpp index f0588cfecf463..c1fb9941b5251 100644 --- a/clang/test/CodeGen/attr-noinline.cpp +++ b/clang/test/CodeGen/attr-noinline.cpp @@ -9,6 +9,7 @@ static int baz(int x) { } [[clang::noinline]] bool noi() { } +[[msvc::noinline]] bool ms_noi() { return true; } void foo(int i) { [[clang::noinline]] bar(); @@ -39,6 +40,31 @@ void foo(int i) { // CHECK: call noundef zeroext i1 @_Z3barv() } +void ms_noi_check(int i) { + [[msvc::noinline]] bar(); +// CHECK: call noundef zeroext i1 @_Z3barv() #[[NOINLINEATTR:[0-9]+]] + [[msvc::noinline]] i = baz(i); +// CHECK: call noundef i32 @_ZL3bazi({{.*}}) #[[NOINLINEATTR]] + [[msvc::noinline]] (i = 4, bar()); +// CHECK: call noundef zeroext i1 @_Z3barv() #[[NOINLINEATTR]] + [[msvc::noinline]] (void)(bar()); +// CHECK: call noundef zeroext i1 @_Z3barv() #[[NOINLINEATTR]] + [[msvc::noinline]] f(bar(), bar()); +// CHECK: call noundef zeroext i1 @_Z3barv() #[[NOINLINEATTR]] +// CHECK: call noundef zeroext i1 @_Z3barv() #[[NOINLINEATTR]] +// CHECK: call void @_Z1fbb({{.*}}) #[[NOINLINEATTR]] + [[msvc::noinline]] [] { bar(); bar(); }(); // noinline only applies to the anonymous function call +// CHECK: call void @"_ZZ12ms_noi_checkiENK3$_0clEv"(ptr {{[^,]*}} %ref.tmp) #[[NOINLINEATTR]] + [[msvc::noinline]] for (bar(); bar(); bar()) {} +// CHECK: call noundef zeroext i1 @_Z3barv() #[[NOINLINEATTR]] +// CHECK: call noundef zeroext i1 @_Z3barv() #[[NOINLINEATTR]] +// CHECK: call noundef zeroext i1 @_Z3barv() #[[NOINLINEATTR]] + [[msvc::noinline]] ms_noi(); +// CHECK: call noundef zeroext i1 @_Z6ms_noiv() + ms_noi(); +// CHECK: call noundef zeroext i1 @_Z6ms_noiv() +} + struct S { friend bool operator==(const S &LHS, const S &RHS); }; @@ -50,6 +76,12 @@ void func(const S &s1, const S &s2) { bool b; [[clang::noinline]] b = s1 == s2; // CHECK: call noundef zeroext i1 @_ZeqRK1SS1_({{.*}}) #[[NOINLINEATTR]] + + [[msvc::noinline]]g(s1 == s2); +// CHECK: call noundef zeroext i1 @_ZeqRK1SS1_({{.*}}) #[[NOINLINEATTR]] +// CHECK: call void @_Z1gb({{.*}}) #[[NOINLINEATTR]] + [[msvc::noinline]] b = s1 == s2; +// CHECK: call noundef zeroext i1 @_ZeqRK1SS1_({{.*}}) #[[NOINLINEATTR]] } // CHECK: attributes #[[NOINLINEATTR]] = { noinline } diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c index 93a6ab06081c9..d6ee4f68700dc 100644 --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -825,6 +825,30 @@ float extract_lane_f16x8(f16x8 a, int i) { // WEBASSEMBLY-NEXT: ret float %0 return __builtin_wasm_extract_lane_f16x8(a, i); } + +f16x8 min_f16x8(f16x8 a, f16x8 b) { + // WEBASSEMBLY: %0 = tail call <8 x half> @llvm.minimum.v8f16(<8 x half> %a, <8 x half> %b) + // WEBASSEMBLY-NEXT: ret <8 x half> %0 + return __builtin_wasm_min_f16x8(a, b); +} + +f16x8 max_f16x8(f16x8 a, f16x8 b) { + // WEBASSEMBLY: %0 = tail call <8 x half> @llvm.maximum.v8f16(<8 x half> %a, <8 x half> %b) + // WEBASSEMBLY-NEXT: ret <8 x half> %0 + return __builtin_wasm_max_f16x8(a, b); +} + +f16x8 pmin_f16x8(f16x8 a, f16x8 b) { + // WEBASSEMBLY: %0 = tail call <8 x half> @llvm.wasm.pmin.v8f16(<8 x half> %a, <8 x half> %b) + // WEBASSEMBLY-NEXT: ret <8 x half> %0 + return __builtin_wasm_pmin_f16x8(a, b); +} + +f16x8 pmax_f16x8(f16x8 a, f16x8 b) { + // WEBASSEMBLY: %0 = tail call <8 x half> @llvm.wasm.pmax.v8f16(<8 x half> %a, <8 x half> %b) + // WEBASSEMBLY-NEXT: ret <8 x half> %0 + return __builtin_wasm_pmax_f16x8(a, b); +} __externref_t externref_null() { return __builtin_wasm_ref_null_extern(); // WEBASSEMBLY: tail call ptr addrspace(10) @llvm.wasm.ref.null.extern() diff --git a/clang/test/CodeGenCXX/no-unique-address.cpp b/clang/test/CodeGenCXX/no-unique-address.cpp index 7b4bbbf2a05d5..82532c5e1be82 100644 --- a/clang/test/CodeGenCXX/no-unique-address.cpp +++ b/clang/test/CodeGenCXX/no-unique-address.cpp @@ -101,3 +101,28 @@ struct HasZeroSizedFieldWithNonTrivialInit { HasZeroSizedFieldWithNonTrivialInit testHasZeroSizedFieldWithNonTrivialInit = {.a = 1}; // CHECK-LABEL: define {{.*}}cxx_global_var_init // CHECK: call {{.*}}@_ZN14NonTrivialInitC1Ev({{.*}}@testHasZeroSizedFieldWithNonTrivialInit + +void *operator new(unsigned long, void *); +template +struct _box { + [[no_unique_address]] Ty _value; +}; +// Make sure this doesn't crash. +// CHECK-LABEL: define {{.*}}placement_new_struct +void placement_new_struct() { + struct set_value_t {}; + + // GH88077 + struct _tuple : _box, _box {}; + + int _storage[1]; + new (_storage) _tuple{}; + + // GH89547 + struct _tuple2 { + _box a; + }; + + int _storage2[1]; + new (_storage2) _tuple2{}; +} diff --git a/clang/test/CodeGenCXX/partitions.cpp b/clang/test/CodeGenCXX/partitions.cpp index d283dd071f6b2..e80e68f82974b 100644 --- a/clang/test/CodeGenCXX/partitions.cpp +++ b/clang/test/CodeGenCXX/partitions.cpp @@ -40,12 +40,12 @@ export int use() { } // FIXME: The definition of the variables shouldn't be exported too. -// CHECK: @_ZW3mod1a = available_externally global -// CHECK: @_ZW3mod1b = available_externally global +// CHECK: @_ZW3mod1a = external global +// CHECK: @_ZW3mod1b = external global // CHECK: declare{{.*}} i32 @_ZW3mod3foov // CHECK: declare{{.*}} i32 @_ZW3mod3barv -// CHECK-OPT: @_ZW3mod1a = available_externally global -// CHECK-OPT: @_ZW3mod1b = available_externally global +// CHECK-OPT: @_ZW3mod1a = external global +// CHECK-OPT: @_ZW3mod1b = external global // CHECK-OPT: declare{{.*}} i32 @_ZW3mod3foov // CHECK-OPT: declare{{.*}} i32 @_ZW3mod3barv diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c index 1d5f001c23fcc..3022ed1250d59 100644 --- a/clang/test/Driver/x86-target-features.c +++ b/clang/test/Driver/x86-target-features.c @@ -423,8 +423,8 @@ // RUN: %clang -target x86_64-unknown-linux-gnu -mno-apxf -mapxf %s -### -o %t.o 2>&1 | FileCheck -check-prefix=APXF %s // RUN: %clang -target x86_64-unknown-linux-gnu -mapxf -mno-apxf %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-APXF %s // -// APXF: "-target-feature" "+egpr" "-target-feature" "+push2pop2" "-target-feature" "+ppx" "-target-feature" "+ndd" -// NO-APXF: "-target-feature" "-egpr" "-target-feature" "-push2pop2" "-target-feature" "-ppx" "-target-feature" "-ndd" +// APXF: "-target-feature" "+egpr" "-target-feature" "+push2pop2" "-target-feature" "+ppx" "-target-feature" "+ndd" "-target-feature" "+ccmp" "-target-feature" "+nf" +// NO-APXF: "-target-feature" "-egpr" "-target-feature" "-push2pop2" "-target-feature" "-ppx" "-target-feature" "-ndd" "-target-feature" "-ccmp" "-target-feature" "-nf" // RUN: %clang -target x86_64-unknown-linux-gnu -mapx-features=egpr %s -### -o %t.o 2>&1 | FileCheck -check-prefix=EGPR %s // RUN: %clang -target x86_64-unknown-linux-gnu -mapx-features=push2pop2 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=PUSH2POP2 %s diff --git a/clang/test/ExtractAPI/anonymous_record_no_typedef.c b/clang/test/ExtractAPI/anonymous_record_no_typedef.c index 71e460afb1283..789316ca8930b 100644 --- a/clang/test/ExtractAPI/anonymous_record_no_typedef.c +++ b/clang/test/ExtractAPI/anonymous_record_no_typedef.c @@ -84,21 +84,15 @@ struct Vehicle { // TYPE: "text": "The type of vehicle." // TYPE: "title": "type" - // BICYCLE: "!testRelLabel": "memberOf $ c:@S@Vehicle@E@anonymous_record_no_typedef.c@{{[0-9]+}}@Bicycle $ c:@S@Vehicle@FI@type" // BICYCLE-LABEL: "!testLabel": "c:@S@Vehicle@E@anonymous_record_no_typedef.c@{{[0-9]+}}@Bicycle" // BICYCLE: "title": "Bicycle" // BICYCLE: "pathComponents": [ - // BICYCLE-NEXT: "Vehicle", - // BICYCLE-NEXT: "type", // BICYCLE-NEXT: "Bicycle" // BICYCLE-NEXT: ] - // CAR: "!testRelLabel": "memberOf $ c:@S@Vehicle@E@anonymous_record_no_typedef.c@{{[0-9]+}}@Car $ c:@S@Vehicle@FI@type" // CAR-LABEL: "!testLabel": "c:@S@Vehicle@E@anonymous_record_no_typedef.c@{{[0-9]+}}@Car" // CAR: "title": "Car" // CAR: "pathComponents": [ - // CAR-NEXT: "Vehicle", - // CAR-NEXT: "type", // CAR-NEXT: "Car" // CAR-NEXT: ] @@ -151,32 +145,22 @@ struct Vehicle { // NAME-NEXT: ] }; -// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix GLOBALENUM +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix GLOBALCASE +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix GLOBALOTHERCASE enum { GlobalCase, GlobalOtherCase }; -// GLOBALENUM-DAG: "!testRelLabel": "memberOf $ c:@Ea@GlobalCase@GlobalCase $ c:@Ea@GlobalCase" -// GLOBALENUM-DAG: "!testRelLabel": "memberOf $ c:@Ea@GlobalCase@GlobalOtherCase $ c:@Ea@GlobalCase" -// GLOBALENUM-LABEL: "!testLabel": "c:@Ea@GlobalCase" -// GLOBALENUM: "declarationFragments": [ -// GLOBALENUM-NEXT: { -// GLOBALENUM-NEXT: "kind": "keyword", -// GLOBALENUM-NEXT: "spelling": "enum" -// GLOBALENUM-NEXT: }, -// GLOBALENUM-NEXT: { -// GLOBALENUM-NEXT: "kind": "text", -// GLOBALENUM-NEXT: "spelling": " : " -// GLOBALENUM-NEXT: }, -// GLOBALENUM-NEXT: { -// GLOBALENUM-NEXT: "kind": "typeIdentifier", -// GLOBALENUM-NEXT: "preciseIdentifier": "c:i", -// GLOBALENUM-NEXT: "spelling": "unsigned int" -// GLOBALENUM-NEXT: }, -// GLOBALENUM-NEXT: { -// GLOBALENUM-NEXT: "kind": "text", -// GLOBALENUM-NEXT: "spelling": " { ... };" -// GLOBALENUM-NEXT: } -// GLOBALENUM-NEXT: ] +// GLOBALCASE-LABEL: "!testLabel": "c:@Ea@GlobalCase@GlobalCase" +// GLOBALCASE: "title": "GlobalCase" +// GLOBALCASE: "pathComponents": [ +// GLOBALCASE-NEXT: "GlobalCase" +// GLOBALCASE-NEXT: ] + +// GLOBALOTHERCASE-LABEL: "!testLabel": "c:@Ea@GlobalCase@GlobalOtherCase" +// GLOBALOTHERCASE: "title": "GlobalOtherCase" +// GLOBALOTHERCASE: "pathComponents": [ +// GLOBALOTHERCASE-NEXT: "GlobalOtherCase" +// GLOBALOTHERCASE-NEXT: ] // expected-no-diagnostics diff --git a/clang/test/ExtractAPI/enum.c b/clang/test/ExtractAPI/enum.c index 67e003834a7d5..58170aa0e1d90 100644 --- a/clang/test/ExtractAPI/enum.c +++ b/clang/test/ExtractAPI/enum.c @@ -115,18 +115,6 @@ enum { "source": "c:@E@Direction@West", "target": "c:@E@Direction", "targetFallback": "Direction" - }, - { - "kind": "memberOf", - "source": "c:@Ea@Constant@Constant", - "target": "c:@Ea@Constant", - "targetFallback": "enum (unnamed)" - }, - { - "kind": "memberOf", - "source": "c:@Ea@OtherConstant@OtherConstant", - "target": "c:@Ea@OtherConstant", - "targetFallback": "enum (unnamed)" } ], "symbols": [ @@ -677,55 +665,6 @@ enum { "West" ] }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "enum" - }, - { - "kind": "text", - "spelling": " : " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:i", - "spelling": "unsigned int" - }, - { - "kind": "text", - "spelling": " { ... };" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:@Ea@Constant" - }, - "kind": { - "displayName": "Enumeration", - "identifier": "c.enum" - }, - "location": { - "position": { - "character": 0, - "line": 16 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "enum (unnamed)" - } - ], - "title": "enum (unnamed)" - }, - "pathComponents": [ - "enum (unnamed)" - ] - }, { "accessLevel": "public", "declarationFragments": [ @@ -765,59 +704,9 @@ enum { "title": "Constant" }, "pathComponents": [ - "enum (unnamed)", "Constant" ] }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "enum" - }, - { - "kind": "text", - "spelling": " : " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:i", - "spelling": "unsigned int" - }, - { - "kind": "text", - "spelling": " { ... };" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:@Ea@OtherConstant" - }, - "kind": { - "displayName": "Enumeration", - "identifier": "c.enum" - }, - "location": { - "position": { - "character": 0, - "line": 20 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "enum (unnamed)" - } - ], - "title": "enum (unnamed)" - }, - "pathComponents": [ - "enum (unnamed)" - ] - }, { "accessLevel": "public", "declarationFragments": [ @@ -857,7 +746,6 @@ enum { "title": "OtherConstant" }, "pathComponents": [ - "enum (unnamed)", "OtherConstant" ] } diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip index 1271868a53b86..26da82843c512 100644 --- a/clang/test/Headers/__clang_hip_math.hip +++ b/clang/test/Headers/__clang_hip_math.hip @@ -231,7 +231,7 @@ extern "C" __device__ uint64_t test___make_mantissa(const char *p) { // CHECK-LABEL: @test_abs( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef i32 @llvm.abs.i32(i32 [[X:%.*]], i1 true) +// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef range(i32 0, -2147483648) i32 @llvm.abs.i32(i32 [[X:%.*]], i1 true) // CHECK-NEXT: ret i32 [[TMP0]] // extern "C" __device__ int test_abs(int x) { @@ -240,7 +240,7 @@ extern "C" __device__ int test_abs(int x) { // CHECK-LABEL: @test_labs( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true) +// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true) // CHECK-NEXT: ret i64 [[TMP0]] // extern "C" __device__ long test_labs(long x) { @@ -249,7 +249,7 @@ extern "C" __device__ long test_labs(long x) { // CHECK-LABEL: @test_llabs( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true) +// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true) // CHECK-NEXT: ret i64 [[TMP0]] // extern "C" __device__ long long test_llabs(long x) { diff --git a/clang/test/Lexer/cxx2c-raw-strings.cpp b/clang/test/Lexer/cxx2c-raw-strings.cpp new file mode 100644 index 0000000000000..569a4b8447e57 --- /dev/null +++ b/clang/test/Lexer/cxx2c-raw-strings.cpp @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -Wc++26-extensions %s +// RUN: %clang_cc1 -std=c++2c -fsyntax-only -verify=cxx26 -Wpre-c++26-compat %s + +int main() { + (void) R"abc`@$(foobar)abc`@$"; + //expected-warning@-1 {{'`' in a raw string literal delimiter is a C++2c extension}} + //expected-warning@-2 {{'@' in a raw string literal delimiter is a C++2c extension}} + //expected-warning@-3 {{'$' in a raw string literal delimiter is a C++2c extension}} + //cxx26-warning@-4 {{'`' in a raw string literal delimiter is incompatible with standards before C++2c}} + //cxx26-warning@-5 {{'@' in a raw string literal delimiter is incompatible with standards before C++2c}} + //cxx26-warning@-6 {{'$' in a raw string literal delimiter is incompatible with standards before C++2c}} +} diff --git a/clang/test/Modules/pr93497.cppm b/clang/test/Modules/pr93497.cppm new file mode 100644 index 0000000000000..64a08e2a85e63 --- /dev/null +++ b/clang/test/Modules/pr93497.cppm @@ -0,0 +1,106 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t + +// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/mod.cppm \ +// RUN: -emit-module-interface -o %t/mod.pcm +// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/use.cpp \ +// RUN: -fmodule-file=mod=%t/mod.pcm -emit-llvm \ +// RUN: -o - | opt -S --passes=simplifycfg | FileCheck %t/use.cpp + +//--- mod.cppm +export module mod; + +export struct Thing { + static const Thing One; + explicit Thing(int raw) :raw(raw) { } + int raw; +}; + +const Thing Thing::One = Thing(1); + +export struct C { + int value; +}; +export const C ConstantValue = {1}; + +export const C *ConstantPtr = &ConstantValue; + +C NonConstantValue = {1}; +export const C &ConstantRef = NonConstantValue; + +export struct NonConstexprDtor { + constexpr NonConstexprDtor(int raw) : raw(raw) {} + ~NonConstexprDtor(); + + int raw; +}; + +export const NonConstexprDtor NonConstexprDtorValue = {1}; + +//--- use.cpp +import mod; + +int consume(int); +int consumeC(C); + +extern "C" __attribute__((noinline)) inline int unneeded() { + return consume(43); +} + +extern "C" __attribute__((noinline)) inline int needed() { + return consume(43); +} + +int use() { + Thing t1 = Thing::One; + return consume(t1.raw); +} + +int use2() { + if (ConstantValue.value) + return consumeC(ConstantValue); + return unneeded(); +} + +int use3() { + auto Ptr = ConstantPtr; + if (Ptr->value) + return consumeC(*Ptr); + return needed(); +} + +int use4() { + auto Ref = ConstantRef; + if (Ref.value) + return consumeC(Ref); + return needed(); +} + +int use5() { + NonConstexprDtor V = NonConstexprDtorValue; + if (V.raw) + return consume(V.raw); + return needed(); +} + +// CHECK: @_ZNW3mod5Thing3OneE = external +// CHECK: @_ZW3mod13ConstantValue ={{.*}}available_externally{{.*}} constant +// CHECK: @_ZW3mod11ConstantPtr = external +// CHECK: @_ZW3mod16NonConstantValue = external +// CHECK: @_ZW3mod21NonConstexprDtorValue = external + +// Check that the middle end can optimize the program by the constant information. +// CHECK-NOT: @unneeded( + +// Check that the use of ConstantPtr won't get optimized incorrectly. +// CHECK-LABEL: @_Z4use3v( +// CHECK: @needed( + +// Check that the use of ConstantRef won't get optimized incorrectly. +// CHECK-LABEL: @_Z4use4v( +// CHECK: @needed( + +// Check that the use of NonConstexprDtorValue won't get optimized incorrectly. +// CHECK-LABEL: @_Z4use5v( +// CHECK: @needed( diff --git a/clang/test/Parser/decls.c b/clang/test/Parser/decls.c new file mode 100644 index 0000000000000..39ef05bf4bd99 --- /dev/null +++ b/clang/test/Parser/decls.c @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 %s -fsyntax-only -verify -pedantic + +// Test that we can parse declarations at global scope. +int v; + +void func(void) { + // Test that we can parse declarations within a compound statement. + int a; + { + int b; + } + + int z = ({ // expected-warning {{use of GNU statement expression extension}} + // Test that we can parse declarations within a GNU statement expression. + int w = 12; + w; + }); + + // Test that we diagnose declarations where a statement is required. + // See GH92775. + if (1) + int x; // expected-error {{expected expression}} + for (;;) + int c; // expected-error {{expected expression}} + + label: + int y; // expected-warning {{label followed by a declaration is a C23 extension}} + + // Test that lookup works as expected. + (void)a; + (void)v; + (void)z; + (void)b; // expected-error {{use of undeclared identifier 'b'}} + (void)w; // expected-error {{use of undeclared identifier 'w'}} + (void)x; // expected-error {{use of undeclared identifier 'x'}} + (void)c; // expected-error {{use of undeclared identifier 'c'}} + (void)y; +} + diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c index 7567267be26b4..6c08b379c9386 100644 --- a/clang/test/Preprocessor/x86_target_features.c +++ b/clang/test/Preprocessor/x86_target_features.c @@ -754,7 +754,7 @@ // RUN: %clang -target x86_64-unknown-unknown -march=x86-64 -mapx-features=ccmp -x c -E -dM -o - %s | FileCheck --check-prefix=CCMP %s // RUN: %clang -target x86_64-unknown-unknown -march=x86-64 -mapx-features=nf -x c -E -dM -o - %s | FileCheck --check-prefix=NF %s // RUN: %clang -target x86_64-unknown-unknown -march=x86-64 -mapx-features=cf -x c -E -dM -o - %s | FileCheck --check-prefix=CF %s -// RUN: %clang -target x86_64-unknown-unknown -march=x86-64 -mapxf -x c -E -dM -o - %s | FileCheck --check-prefixes=EGPR,PUSH2POP2,PPX,NDD,APXF %s +// RUN: %clang -target x86_64-unknown-unknown -march=x86-64 -mapxf -x c -E -dM -o - %s | FileCheck --check-prefixes=EGPR,PUSH2POP2,PPX,NDD,CCMP,NF,APXF %s // APXF: #define __APX_F__ 1 // CCMP: #define __CCMP__ 1 // CF: #define __CF__ 1 diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_b16b16.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_b16b16.cpp new file mode 100644 index 0000000000000..62a1f8e6de1d7 --- /dev/null +++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_b16b16.cpp @@ -0,0 +1,13 @@ +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -emit-llvm-only -verify -verify-ignore-unexpected=error,note -o - %s + +#include + +void test_b16b16( svbfloat16_t bf16, svbfloat16x2_t bf16x2, svbfloat16x4_t bf16x4) __arm_streaming +{ + // expected-error@+1 {{'svclamp_single_bf16_x2' needs target feature sme2,b16b16}} + svclamp_single_bf16_x2(bf16x2, bf16, bf16); + // expected-error@+1 {{'svclamp_single_bf16_x4' needs target feature sme2,b16b16}} + svclamp_single_bf16_x4(bf16x4, bf16, bf16); +} \ No newline at end of file diff --git a/clang/test/Sema/attr-noinline.cpp b/clang/test/Sema/attr-noinline.cpp index bd6505b9fe98e..6da0e873af1b6 100644 --- a/clang/test/Sema/attr-noinline.cpp +++ b/clang/test/Sema/attr-noinline.cpp @@ -2,9 +2,9 @@ int bar(); -// expected-note@+1{{conflicting attribute is here}} +// expected-note@+1 2 {{conflicting attribute is here}} [[gnu::always_inline]] void always_inline_fn(void) { } -// expected-note@+1{{conflicting attribute is here}} +// expected-note@+1 2 {{conflicting attribute is here}} [[gnu::flatten]] void flatten_fn(void) { } [[gnu::noinline]] void noinline_fn(void) { } @@ -25,7 +25,21 @@ void foo() { __attribute__((noinline)) bar(); // expected-warning {{attribute is ignored on this statement as it only applies to functions; use '[[clang::noinline]]' on statements}} } +void ms_noi_check() { + [[msvc::noinline]] bar(); + [[msvc::noinline(0)]] bar(); // expected-error {{'noinline' attribute takes no arguments}} + int x; + [[msvc::noinline]] x = 0; // expected-warning {{'noinline' attribute is ignored because there exists no call expression inside the statement}} + [[msvc::noinline]] { asm("nop"); } // expected-warning {{'noinline' attribute is ignored because there exists no call expression inside the statement}} + [[msvc::noinline]] label: x = 1; // expected-warning {{'noinline' attribute only applies to functions and statements}} + + [[msvc::noinline]] always_inline_fn(); // expected-warning {{statement attribute 'noinline' has higher precedence than function attribute 'always_inline'}} + [[msvc::noinline]] flatten_fn(); // expected-warning {{statement attribute 'noinline' has higher precedence than function attribute 'flatten'}} + [[msvc::noinline]] noinline_fn(); +} + [[clang::noinline]] static int i = bar(); // expected-warning {{'noinline' attribute only applies to functions and statements}} +[[msvc::noinline]] static int j = bar(); // expected-warning {{'noinline' attribute only applies to functions and statements}} // This used to crash the compiler. template @@ -69,7 +83,39 @@ int variadic_baz(int x) { [[clang::noinline]] return non_dependent(x) + (dependent(x) + ...); } +template [[clang::always_inline]] +int qux(int x) { // #QUX + // expected-warning@+2{{statement attribute 'noinline' has higher precedence than function attribute 'always_inline'}} + // expected-note@#NO_DEP{{conflicting attribute is here}} + [[msvc::noinline]] non_dependent(x); + if constexpr (D>0) { + // expected-warning@+6{{statement attribute 'noinline' has higher precedence than function attribute 'always_inline'}} + // expected-note@#NO_DEP{{conflicting attribute is here}} + // expected-warning@+4 3{{statement attribute 'noinline' has higher precedence than function attribute 'always_inline'}} + // expected-note@#QUX 3{{conflicting attribute is here}} + // expected-note@#QUX_INST 3{{in instantiation}} + // expected-note@+1 3{{in instantiation}} + [[msvc::noinline]] return non_dependent(x), qux(x + 1); + } + return x; +} + +// We can't suppress if there is a variadic involved. +template +int variadic_qux(int x) { + // Diagnoses NO_DEP 2x, once during phase 1, the second during instantiation. + // Dianoses DEP 3x, once per variadic expansion. + // expected-warning@+5 2{{statement attribute 'noinline' has higher precedence than function attribute 'always_inline'}} + // expected-note@#NO_DEP 2{{conflicting attribute is here}} + // expected-warning@+3 3{{statement attribute 'noinline' has higher precedence than function attribute 'always_inline'}} + // expected-note@#DEP 3{{conflicting attribute is here}} + // expected-note@#QUX_VARIADIC_INST{{in instantiation}} + [[msvc::noinline]] return non_dependent(x) + (dependent(x) + ...); +} + void use() { baz<3>(0); // #BAZ_INST variadic_baz<0, 1, 2>(0); // #VARIADIC_INST + qux<3>(0); // #QUX_INST + variadic_qux<0, 1, 2>(0); // #QUX_VARIADIC_INST } diff --git a/clang/test/SemaCXX/invalid-if-constexpr.cpp b/clang/test/SemaCXX/invalid-if-constexpr.cpp index 7643c47488f05..0007f2739cbbd 100644 --- a/clang/test/SemaCXX/invalid-if-constexpr.cpp +++ b/clang/test/SemaCXX/invalid-if-constexpr.cpp @@ -4,8 +4,7 @@ namespace GH61885 { void similar() { // expected-note {{'similar' declared here}} if constexpr (similer<>) {} // expected-error {{use of undeclared identifier 'similer'; did you mean 'similar'?}} } -void a() { if constexpr (__adl_swap<>) {}} // expected-error{{use of undeclared identifier '__adl_swap'; did you mean '__sync_swap'?}} \ - // expected-note {{'__sync_swap' declared here}} +void a() { if constexpr (__adl_swap<>) {}} // expected-error{{use of undeclared identifier '__adl_swap'; did you mean '__sync_swap'?}} int AA() { return true;} // expected-note {{'AA' declared here}} diff --git a/clang/test/SemaCXX/invalid-this-in-lambda.cpp b/clang/test/SemaCXX/invalid-this-in-lambda.cpp new file mode 100644 index 0000000000000..ae65bda025e23 --- /dev/null +++ b/clang/test/SemaCXX/invalid-this-in-lambda.cpp @@ -0,0 +1,4 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s + +decltype([]()->decltype(this) { }) a; // expected-error {{invalid use of 'this' outside of a non-static member function}} + diff --git a/clang/test/SemaCXX/typo-correction-builtin-func.cpp b/clang/test/SemaCXX/typo-correction-builtin-func.cpp new file mode 100644 index 0000000000000..8d369034d1be3 --- /dev/null +++ b/clang/test/SemaCXX/typo-correction-builtin-func.cpp @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s + +// Test that clang does not emit 'declared here' note for builtin functions that don't have a declaration in source. + +void t0() { + constexpr float A = __builtin_isinfinity(); // expected-error {{use of undeclared identifier '__builtin_isinfinity'; did you mean '__builtin_isfinite'?}} + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} +} diff --git a/clang/test/SemaOpenACC/parallel-loc-and-stmt.c b/clang/test/SemaOpenACC/parallel-loc-and-stmt.c index ba29f6da8ba25..bbcdd823483a5 100644 --- a/clang/test/SemaOpenACC/parallel-loc-and-stmt.c +++ b/clang/test/SemaOpenACC/parallel-loc-and-stmt.c @@ -33,9 +33,11 @@ int foo3; void func() { // FIXME: Should we disallow this on declarations, or consider this to be on - // the initialization? + // the initialization? This is currently rejected in C because + // Parser::ParseOpenACCDirectiveStmt() calls ParseStatement() and passes the + // statement context as "SubStmt" which does not allow for a declaration in C. #pragma acc parallel - int foo; + int foo; // expected-error {{expected expression}} #pragma acc parallel { diff --git a/clang/test/SemaTemplate/temp_arg_nontype_cxx2c.cpp b/clang/test/SemaTemplate/temp_arg_nontype_cxx2c.cpp index 9fb6b440b6b2a..e74c031eba4c1 100644 --- a/clang/test/SemaTemplate/temp_arg_nontype_cxx2c.cpp +++ b/clang/test/SemaTemplate/temp_arg_nontype_cxx2c.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsyntax-only -std=c++20 -Wconversion -verify %s +// RUN: %clang_cc1 -fsyntax-only -std=c++2c -Wconversion -verify %s struct Test { int a = 0; @@ -102,3 +102,24 @@ void bar() { } } + +namespace GH84052 { + +template +concept C = sizeof(T...[1]) == 1; // #C + +struct A {}; + +template auto = A{}> struct Set {}; // #Set + +template void foo() { + Set unrelated; +} + +Set sb; +Set sf; +// expected-error@-1 {{constraints not satisfied for class template 'Set'}} +// expected-note@#Set {{because 'C' evaluated to false}} +// expected-note@#C {{evaluated to false}} + +} // namespace GH84052 diff --git a/clang/test/TableGen/deferred-diag.td b/clang/test/TableGen/deferred-diag.td index c1906d4a9e45e..d7e8e694c7b3e 100644 --- a/clang/test/TableGen/deferred-diag.td +++ b/clang/test/TableGen/deferred-diag.td @@ -4,24 +4,24 @@ include "DiagnosticBase.inc" // Test usage of Deferrable and NonDeferrable in diagnostics. -def test_default : Error<"This error is non-deferrable by default">; +def test_default : Error<"this error is non-deferrable by default">; // CHECK-DAG: DIAG(test_default, {{.*}}SFINAE_SubstitutionFailure, false, true, true, false, 0) -def test_deferrable : Error<"This error is deferrable">, Deferrable; +def test_deferrable : Error<"this error is deferrable">, Deferrable; // CHECK-DAG: DIAG(test_deferrable, {{.*}} SFINAE_SubstitutionFailure, false, true, true, true, 0) -def test_non_deferrable : Error<"This error is non-deferrable">, NonDeferrable; +def test_non_deferrable : Error<"this error is non-deferrable">, NonDeferrable; // CHECK-DAG: DIAG(test_non_deferrable, {{.*}} SFINAE_SubstitutionFailure, false, true, true, false, 0) let Deferrable = 1 in { -def test_let : Error<"This error is deferrable by let">; +def test_let : Error<"this error is deferrable by let">; // CHECK-DAG: DIAG(test_let, {{.*}} SFINAE_SubstitutionFailure, false, true, true, true, 0) // Make sure TextSubstitution is allowed in the let Deferrable block. def textsub : TextSubstitution<"%select{text1|text2}0">; -def test_let2 : Error<"This error is deferrable by let %sub{textsub}0">; +def test_let2 : Error<"this error is deferrable by let %sub{textsub}0">; // CHECK-DAG: DIAG(test_let2, {{.*}} SFINAE_SubstitutionFailure, false, true, true, true, 0) } diff --git a/clang/test/TableGen/text-substitution.td b/clang/test/TableGen/text-substitution.td index aafdbe48c43be..b0d030aca6513 100644 --- a/clang/test/TableGen/text-substitution.td +++ b/clang/test/TableGen/text-substitution.td @@ -26,8 +26,8 @@ def sub_test_rewrite : TextSubstitution< // CHECK-SAME: Q! %q1. // CHECK-SAME: PLACEHOLDER! %0.OBJCCLASS! // CHECK-SAME: %objcclass5. OBJCINSTANCE! -// CHECK-SAME: %objcinstance4. DONE!", -def test_rewrite: Error<"%sub{sub_test_rewrite}5,4,3,2,1,0 DONE!">; +// CHECK-SAME: %objcinstance4. DONE", +def test_rewrite: Error<"%sub{sub_test_rewrite}5,4,3,2,1,0 DONE">; def test_sub_basic : Error<"%sub{yes_no}0">; // CHECK: test_sub_basic diff --git a/clang/test/TableGen/wording-errors.td b/clang/test/TableGen/wording-errors.td new file mode 100644 index 0000000000000..eb5eb2f547c78 --- /dev/null +++ b/clang/test/TableGen/wording-errors.td @@ -0,0 +1,55 @@ +// RUN: not clang-tblgen -gen-clang-diags-defs -I%S %s -o /dev/null 2>&1 | FileCheck %s +include "DiagnosticBase.inc" + +// Ensure we catch a capital letter at the start of a diagnostic. +def zero : Error< + "This is bad">; +// CHECK-DAG: wording-errors.td:[[@LINE-2]]:5: error: Diagnostics should not start with a capital letter; 'This' is invalid + +// Test that we also correctly handle selections. +def one : Error< + "%select{|or}0 That">; +// CHECK-DAG: wording-errors.td:[[@LINE-2]]:5: error: Diagnostics should not start with a capital letter; 'That' is invalid +def two : Error< + "%select{as does|}0 This">; +// CHECK-DAG: wording-errors.td:[[@LINE-2]]:5: error: Diagnostics should not start with a capital letter; 'This' is invalid +def three : Error< + "%select{and||of course}0 Whatever">; +// CHECK-DAG: wording-errors.td:[[@LINE-2]]:5: error: Diagnostics should not start with a capital letter; 'Whatever' is invalid + +// Test that we accept the following cases. +def four : Error< + "this is fine">; +def five : Error< + "%select{this|is|also}0 Fine">; +def six : Error< + "%select{this|is|also|}0 fine">; +def seven : Error< + "%select{ARC|C|C23|C++14|OpenMP}0 are also fine">; + +// Next, test that we catch punctuation at the end of the diagnostic. +def eight : Error< + "punctuation is bad.">; +// CHECK-DAG: wording-errors.td:[[@LINE-2]]:5: error: Diagnostics should not end with punctuation; '.' is invalid +def nine : Error< + "it's really bad!">; +// CHECK-DAG: wording-errors.td:[[@LINE-2]]:5: error: Diagnostics should not end with punctuation; '!' is invalid +def ten : Error< + "we also catch %select{punctuation.|in select}0">; +// CHECK-DAG: wording-errors.td:[[@LINE-2]]:5: error: Diagnostics should not end with punctuation; '.' is invalid +def eleven : Error< + "and %select{|here.}0">; +// CHECK-DAG: wording-errors.td:[[@LINE-2]]:5: error: Diagnostics should not end with punctuation; '.' is invalid +def twelve : Error< + "and %select{here.|}0">; +// CHECK-DAG: wording-errors.td:[[@LINE-2]]:5: error: Diagnostics should not end with punctuation; '.' is invalid +def thirteen : Error< + "and even %select{|here.|}0">; +// CHECK-DAG: wording-errors.td:[[@LINE-2]]:5: error: Diagnostics should not end with punctuation; '.' is invalid +def fourteen : Error< + "and %select{here}0.">; +// CHECK-DAG: wording-errors.td:[[@LINE-2]]:5: error: Diagnostics should not end with punctuation; '.' is invalid + +// Test that we accept the following cases. +def fifteen : Error< + "question marks are intentionally okay?">; diff --git a/clang/tools/libclang/CXExtractAPI.cpp b/clang/tools/libclang/CXExtractAPI.cpp index d74f3740406c5..c35558e66fcb9 100644 --- a/clang/tools/libclang/CXExtractAPI.cpp +++ b/clang/tools/libclang/CXExtractAPI.cpp @@ -45,6 +45,9 @@ struct LibClangExtractAPIVisitor : ExtractAPIVisitor(Context, API) {} const RawComment *fetchRawCommentForDecl(const Decl *D) const { + if (const auto *Comment = Base::fetchRawCommentForDecl(D)) + return Comment; + return Context.getRawCommentForAnyRedecl(D); } diff --git a/clang/unittests/Interpreter/CodeCompletionTest.cpp b/clang/unittests/Interpreter/CodeCompletionTest.cpp index 873fbda32f057..72c02c683fafd 100644 --- a/clang/unittests/Interpreter/CodeCompletionTest.cpp +++ b/clang/unittests/Interpreter/CodeCompletionTest.cpp @@ -4,6 +4,7 @@ #include "clang/Lex/Preprocessor.h" #include "clang/Sema/CodeCompleteConsumer.h" #include "clang/Sema/Sema.h" +#include "llvm/ExecutionEngine/Orc/LLJIT.h" #include "llvm/LineEditor/LineEditor.h" #include "llvm/Support/Error.h" #include "llvm/Support/raw_ostream.h" @@ -11,6 +12,10 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" +#if defined(_AIX) || defined(__MVS__) +#define CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +#endif + using namespace clang; namespace { auto CB = clang::IncrementalCompilerBuilder(); @@ -50,7 +55,21 @@ static std::vector runComp(clang::Interpreter &MainInterp, return Comps; } +static bool HostSupportsJit() { + auto J = llvm::orc::LLJITBuilder().create(); + if (J) + return true; + LLVMConsumeError(llvm::wrap(J.takeError())); + return false; +} + +#ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +TEST(CodeCompletionTest, DISABLED_Sanity) { +#else TEST(CodeCompletionTest, Sanity) { +#endif + if (!HostSupportsJit()) + GTEST_SKIP(); auto Interp = createInterpreter(); cantFail(Interp->Parse("int foo = 12;")); auto Err = llvm::Error::success(); @@ -61,7 +80,13 @@ TEST(CodeCompletionTest, Sanity) { EXPECT_EQ((bool)Err, false); } +#ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +TEST(CodeCompletionTest, DISABLED_SanityNoneValid) { +#else TEST(CodeCompletionTest, SanityNoneValid) { +#endif + if (!HostSupportsJit()) + GTEST_SKIP(); auto Interp = createInterpreter(); cantFail(Interp->Parse("int foo = 12;")); auto Err = llvm::Error::success(); @@ -70,7 +95,13 @@ TEST(CodeCompletionTest, SanityNoneValid) { EXPECT_EQ((bool)Err, false); } +#ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +TEST(CodeCompletionTest, DISABLED_TwoDecls) { +#else TEST(CodeCompletionTest, TwoDecls) { +#endif + if (!HostSupportsJit()) + GTEST_SKIP(); auto Interp = createInterpreter(); cantFail(Interp->Parse("int application = 12;")); cantFail(Interp->Parse("int apple = 12;")); @@ -80,14 +111,26 @@ TEST(CodeCompletionTest, TwoDecls) { EXPECT_EQ((bool)Err, false); } +#ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +TEST(CodeCompletionTest, DISABLED_CompFunDeclsNoError) { +#else TEST(CodeCompletionTest, CompFunDeclsNoError) { +#endif + if (!HostSupportsJit()) + GTEST_SKIP(); auto Interp = createInterpreter(); auto Err = llvm::Error::success(); auto comps = runComp(*Interp, "void app(", Err); EXPECT_EQ((bool)Err, false); } +#ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +TEST(CodeCompletionTest, DISABLED_TypedDirected) { +#else TEST(CodeCompletionTest, TypedDirected) { +#endif + if (!HostSupportsJit()) + GTEST_SKIP(); auto Interp = createInterpreter(); cantFail(Interp->Parse("int application = 12;")); cantFail(Interp->Parse("char apple = '2';")); @@ -119,7 +162,13 @@ TEST(CodeCompletionTest, TypedDirected) { } } +#ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +TEST(CodeCompletionTest, DISABLED_SanityClasses) { +#else TEST(CodeCompletionTest, SanityClasses) { +#endif + if (!HostSupportsJit()) + GTEST_SKIP(); auto Interp = createInterpreter(); cantFail(Interp->Parse("struct Apple{};")); cantFail(Interp->Parse("void takeApple(Apple &a1){}")); @@ -142,7 +191,13 @@ TEST(CodeCompletionTest, SanityClasses) { } } +#ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +TEST(CodeCompletionTest, DISABLED_SubClassing) { +#else TEST(CodeCompletionTest, SubClassing) { +#endif + if (!HostSupportsJit()) + GTEST_SKIP(); auto Interp = createInterpreter(); cantFail(Interp->Parse("struct Fruit {};")); cantFail(Interp->Parse("struct Apple : Fruit{};")); @@ -157,7 +212,13 @@ TEST(CodeCompletionTest, SubClassing) { EXPECT_EQ((bool)Err, false); } +#ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +TEST(CodeCompletionTest, DISABLED_MultipleArguments) { +#else TEST(CodeCompletionTest, MultipleArguments) { +#endif + if (!HostSupportsJit()) + GTEST_SKIP(); auto Interp = createInterpreter(); cantFail(Interp->Parse("int foo = 42;")); cantFail(Interp->Parse("char fowl = 'A';")); @@ -169,7 +230,13 @@ TEST(CodeCompletionTest, MultipleArguments) { EXPECT_EQ((bool)Err, false); } +#ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +TEST(CodeCompletionTest, DISABLED_Methods) { +#else TEST(CodeCompletionTest, Methods) { +#endif + if (!HostSupportsJit()) + GTEST_SKIP(); auto Interp = createInterpreter(); cantFail(Interp->Parse( "struct Foo{int add(int a){return 42;} int par(int b){return 42;}};")); @@ -183,7 +250,13 @@ TEST(CodeCompletionTest, Methods) { EXPECT_EQ((bool)Err, false); } +#ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +TEST(CodeCompletionTest, DISABLED_MethodsInvocations) { +#else TEST(CodeCompletionTest, MethodsInvocations) { +#endif + if (!HostSupportsJit()) + GTEST_SKIP(); auto Interp = createInterpreter(); cantFail(Interp->Parse( "struct Foo{int add(int a){return 42;} int par(int b){return 42;}};")); @@ -197,7 +270,13 @@ TEST(CodeCompletionTest, MethodsInvocations) { EXPECT_EQ((bool)Err, false); } +#ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +TEST(CodeCompletionTest, DISABLED_NestedInvocations) { +#else TEST(CodeCompletionTest, NestedInvocations) { +#endif + if (!HostSupportsJit()) + GTEST_SKIP(); auto Interp = createInterpreter(); cantFail(Interp->Parse( "struct Foo{int add(int a){return 42;} int par(int b){return 42;}};")); @@ -212,7 +291,13 @@ TEST(CodeCompletionTest, NestedInvocations) { EXPECT_EQ((bool)Err, false); } +#ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +TEST(CodeCompletionTest, DISABLED_TemplateFunctions) { +#else TEST(CodeCompletionTest, TemplateFunctions) { +#endif + if (!HostSupportsJit()) + GTEST_SKIP(); auto Interp = createInterpreter(); cantFail( Interp->Parse("template T id(T a) { return a;} ")); diff --git a/clang/unittests/Interpreter/IncrementalProcessingTest.cpp b/clang/unittests/Interpreter/IncrementalProcessingTest.cpp index 54159173d91e3..732753f11306e 100644 --- a/clang/unittests/Interpreter/IncrementalProcessingTest.cpp +++ b/clang/unittests/Interpreter/IncrementalProcessingTest.cpp @@ -36,14 +36,6 @@ using namespace clang; namespace { -static bool HostSupportsJit() { - auto J = llvm::orc::LLJITBuilder().create(); - if (J) - return true; - LLVMConsumeError(llvm::wrap(J.takeError())); - return false; -} - // Incremental processing produces several modules, all using the same "main // file". Make sure CodeGen can cope with that, e.g. for static initializers. const char TestProgram1[] = "extern \"C\" int funcForProg1() { return 17; }\n" @@ -64,11 +56,22 @@ const Function *getGlobalInit(llvm::Module *M) { return nullptr; } +static bool HostSupportsJit() { + auto J = llvm::orc::LLJITBuilder().create(); + if (J) + return true; + LLVMConsumeError(llvm::wrap(J.takeError())); + return false; +} + #ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT TEST(IncrementalProcessing, DISABLED_EmitCXXGlobalInitFunc) { #else TEST(IncrementalProcessing, EmitCXXGlobalInitFunc) { #endif + if (!HostSupportsJit()) + GTEST_SKIP(); + std::vector ClangArgv = {"-Xclang", "-emit-llvm-only"}; auto CB = clang::IncrementalCompilerBuilder(); CB.SetCompilerArgs(ClangArgv); diff --git a/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp b/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp index f564689fff7cf..b290530444d2a 100644 --- a/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp +++ b/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp @@ -1213,6 +1213,197 @@ static bool isRemark(const Record &Diag) { return ClsName == "CLASS_REMARK"; } +// Presumes the text has been split at the first whitespace or hyphen. +static bool isExemptAtStart(StringRef Text) { + // Fast path, the first character is lowercase or not alphanumeric. + if (Text.empty() || isLower(Text[0]) || !isAlnum(Text[0])) + return true; + + // If the text is all uppercase (or numbers, +, or _), then we assume it's an + // acronym and that's allowed. This covers cases like ISO, C23, C++14, and + // OBJECT_MODE. However, if there's only a single letter other than "C", we + // do not exempt it so that we catch a case like "A really bad idea" while + // still allowing a case like "C does not allow...". + if (llvm::all_of(Text, [](char C) { + return isUpper(C) || isDigit(C) || C == '+' || C == '_'; + })) + return Text.size() > 1 || Text[0] == 'C'; + + // Otherwise, there are a few other exemptions. + return StringSwitch(Text) + .Case("AddressSanitizer", true) + .Case("CFString", true) + .Case("Clang", true) + .Case("Fuchsia", true) + .Case("GNUstep", true) + .Case("IBOutletCollection", true) + .Case("Microsoft", true) + .Case("Neon", true) + .StartsWith("NSInvocation", true) // NSInvocation, NSInvocation's + .Case("Objective", true) // Objective-C (hyphen is a word boundary) + .Case("OpenACC", true) + .Case("OpenCL", true) + .Case("OpenMP", true) + .Case("Pascal", true) + .Case("Swift", true) + .Case("Unicode", true) + .Case("Vulkan", true) + .Case("WebAssembly", true) + .Default(false); +} + +// Does not presume the text has been split at all. +static bool isExemptAtEnd(StringRef Text) { + // Rather than come up with a list of characters that are allowed, we go the + // other way and look only for characters that are not allowed. + switch (Text.back()) { + default: + return true; + case '?': + // Explicitly allowed to support "; did you mean?". + return true; + case '.': + case '!': + return false; + } +} + +static void verifyDiagnosticWording(const Record &Diag) { + StringRef FullDiagText = Diag.getValueAsString("Summary"); + + auto DiagnoseStart = [&](StringRef Text) { + // Verify that the text does not start with a capital letter, except for + // special cases that are exempt like ISO and C++. Find the first word + // by looking for a word breaking character. + char Separators[] = {' ', '-', ',', '}'}; + auto Iter = std::find_first_of( + Text.begin(), Text.end(), std::begin(Separators), std::end(Separators)); + + StringRef First = Text.substr(0, Iter - Text.begin()); + if (!isExemptAtStart(First)) { + PrintError(&Diag, + "Diagnostics should not start with a capital letter; '" + + First + "' is invalid"); + } + }; + + auto DiagnoseEnd = [&](StringRef Text) { + // Verify that the text does not end with punctuation like '.' or '!'. + if (!isExemptAtEnd(Text)) { + PrintError(&Diag, "Diagnostics should not end with punctuation; '" + + Text.substr(Text.size() - 1, 1) + "' is invalid"); + } + }; + + // If the diagnostic starts with %select, look through it to see whether any + // of the options will cause a problem. + if (FullDiagText.starts_with("%select{")) { + // Do a balanced delimiter scan from the start of the text to find the + // closing '}', skipping intermediary {} pairs. + + size_t BraceCount = 1; + constexpr size_t PercentSelectBraceLen = sizeof("%select{") - 1; + auto Iter = FullDiagText.begin() + PercentSelectBraceLen; + for (auto End = FullDiagText.end(); Iter != End; ++Iter) { + char Ch = *Iter; + if (Ch == '{') + ++BraceCount; + else if (Ch == '}') + --BraceCount; + if (!BraceCount) + break; + } + // Defending against a malformed diagnostic string. + if (BraceCount != 0) + return; + + StringRef SelectText = + FullDiagText.substr(PercentSelectBraceLen, Iter - FullDiagText.begin() - + PercentSelectBraceLen); + SmallVector SelectPieces; + SelectText.split(SelectPieces, '|'); + + // Walk over all of the individual pieces of select text to see if any of + // them start with an invalid character. If any of the select pieces is + // empty, we need to look at the first word after the %select to see + // whether that is invalid or not. If all of the pieces are fine, then we + // don't need to check anything else about the start of the diagnostic. + bool CheckSecondWord = false; + for (StringRef Piece : SelectPieces) { + if (Piece.empty()) + CheckSecondWord = true; + else + DiagnoseStart(Piece); + } + + if (CheckSecondWord) { + // There was an empty select piece, so we need to check the second + // word. This catches situations like '%select{|fine}0 Not okay'. Add + // two to account for the closing curly brace and the number after it. + StringRef AfterSelect = + FullDiagText.substr(Iter - FullDiagText.begin() + 2).ltrim(); + DiagnoseStart(AfterSelect); + } + } else { + // If the start of the diagnostic is not %select, we can check the first + // word and be done with it. + DiagnoseStart(FullDiagText); + } + + // If the last character in the diagnostic is a number preceded by a }, scan + // backwards to see if this is for a %select{...}0. If it is, we need to look + // at each piece to see whether it ends in punctuation or not. + bool StillNeedToDiagEnd = true; + if (isDigit(FullDiagText.back()) && *(FullDiagText.end() - 2) == '}') { + // Scan backwards to find the opening curly brace. + size_t BraceCount = 1; + auto Iter = FullDiagText.end() - sizeof("}0"); + for (auto End = FullDiagText.begin(); Iter != End; --Iter) { + char Ch = *Iter; + if (Ch == '}') + ++BraceCount; + else if (Ch == '{') + --BraceCount; + if (!BraceCount) + break; + } + // Defending against a malformed diagnostic string. + if (BraceCount != 0) + return; + + // Continue the backwards scan to find the word before the '{' to see if it + // is 'select'. + constexpr size_t SelectLen = sizeof("select") - 1; + bool IsSelect = + (FullDiagText.substr(Iter - SelectLen - FullDiagText.begin(), + SelectLen) == "select"); + if (IsSelect) { + // Gather the content between the {} for the select in question so we can + // split it into pieces. + StillNeedToDiagEnd = false; // No longer need to handle the end. + StringRef SelectText = + FullDiagText.substr(Iter - FullDiagText.begin() + /*{*/ 1, + FullDiagText.end() - Iter - /*pos before }0*/ 3); + SmallVector SelectPieces; + SelectText.split(SelectPieces, '|'); + for (StringRef Piece : SelectPieces) { + // Not worrying about a situation like: "this is bar. %select{foo|}0". + if (!Piece.empty()) + DiagnoseEnd(Piece); + } + } + } + + // If we didn't already cover the diagnostic because of a %select, handle it + // now. + if (StillNeedToDiagEnd) + DiagnoseEnd(FullDiagText); + + // FIXME: This could also be improved by looking for instances of clang or + // gcc in the diagnostic and recommend Clang or GCC instead. However, this + // runs into odd situations like [[clang::warn_unused_result]], + // #pragma clang, or --unwindlib=libgcc. +} /// ClangDiagsDefsEmitter - The top-level class emits .def files containing /// declarations of Clang diagnostics. @@ -1273,6 +1464,9 @@ void clang::EmitClangDiagsDefs(RecordKeeper &Records, raw_ostream &OS, if (!Component.empty() && Component != R.getValueAsString("Component")) continue; + // Validate diagnostic wording for common issues. + verifyDiagnosticWording(R); + OS << "DIAG(" << R.getName() << ", "; OS << R.getValueAsDef("Class")->getName(); OS << ", (unsigned)diag::Severity::" diff --git a/compiler-rt/lib/msan/msan.cpp b/compiler-rt/lib/msan/msan.cpp index a2fc27de1901b..9375e27d4f4d2 100644 --- a/compiler-rt/lib/msan/msan.cpp +++ b/compiler-rt/lib/msan/msan.cpp @@ -100,7 +100,17 @@ int msan_report_count = 0; // Array of stack origins. // FIXME: make it resizable. -static const uptr kNumStackOriginDescrs = 1024 * 1024; +// Although BSS memory doesn't cost anything until used, it is limited to 2GB +// in some configurations (e.g., "relocation R_X86_64_PC32 out of range: +// ... is not in [-2147483648, 2147483647]; references section '.bss'"). +// We use kNumStackOriginDescrs * (sizeof(char*) + sizeof(uptr)) == 64MB. +#ifdef SANITIZER_PPC +// soft_rss_limit test (release_origin.c) fails on PPC if kNumStackOriginDescrs +// is too high +static const uptr kNumStackOriginDescrs = 1 * 1024 * 1024; +#else +static const uptr kNumStackOriginDescrs = 4 * 1024 * 1024; +#endif // SANITIZER_PPC static const char *StackOriginDescr[kNumStackOriginDescrs]; static uptr StackOriginPC[kNumStackOriginDescrs]; static atomic_uint32_t NumStackOriginDescrs; diff --git a/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h b/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h index 06a44f1885656..510ff72998914 100644 --- a/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h +++ b/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h @@ -101,6 +101,10 @@ class ConvertFIRToLLVMPattern : public mlir::ConvertToLLVMPattern { mlir::Value box, mlir::ConversionPatternRewriter &rewriter) const; + mlir::Value getRankFromBox(mlir::Location loc, TypePair boxTy, + mlir::Value box, + mlir::ConversionPatternRewriter &rewriter) const; + // Get the element type given an LLVM type that is of the form // (array|struct|vector)+ and the provided indexes. mlir::Type getBoxEleTy(mlir::Type type, diff --git a/flang/include/flang/Optimizer/HLFIR/Passes.h b/flang/include/flang/Optimizer/HLFIR/Passes.h index edefe36de00c1..83388d0527e19 100644 --- a/flang/include/flang/Optimizer/HLFIR/Passes.h +++ b/flang/include/flang/Optimizer/HLFIR/Passes.h @@ -20,10 +20,6 @@ namespace hlfir { #define GEN_PASS_DECL -#include "flang/Optimizer/HLFIR/Passes.h.inc" - -std::unique_ptr createConvertHLFIRtoFIRPass(); - #define GEN_PASS_REGISTRATION #include "flang/Optimizer/HLFIR/Passes.h.inc" } // namespace hlfir diff --git a/flang/include/flang/Optimizer/HLFIR/Passes.td b/flang/include/flang/Optimizer/HLFIR/Passes.td index 1dd2e3dc81911..ed49f5093c965 100644 --- a/flang/include/flang/Optimizer/HLFIR/Passes.td +++ b/flang/include/flang/Optimizer/HLFIR/Passes.td @@ -12,7 +12,6 @@ include "mlir/Pass/PassBase.td" def ConvertHLFIRtoFIR : Pass<"convert-hlfir-to-fir", "::mlir::ModuleOp"> { let summary = "Lower High-Level FIR to FIR"; - let constructor = "hlfir::createConvertHLFIRtoFIRPass()"; let dependentDialects = [ "mlir::func::FuncDialect", ]; diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc index bb3c90ebc04d4..61ea7a7f9bbdd 100644 --- a/flang/include/flang/Tools/CLOptions.inc +++ b/flang/include/flang/Tools/CLOptions.inc @@ -331,7 +331,7 @@ inline void createHLFIRToFIRPassPipeline( pm.addPass(hlfir::createLowerHLFIROrderedAssignments()); pm.addPass(hlfir::createLowerHLFIRIntrinsics()); pm.addPass(hlfir::createBufferizeHLFIR()); - pm.addPass(hlfir::createConvertHLFIRtoFIRPass()); + pm.addPass(hlfir::createConvertHLFIRtoFIR()); } /// Create a pass pipeline for handling certain OpenMP transformations needed diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index 075d0634fd1ee..8e9c1d640c330 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -41,9 +41,15 @@ #include "flang/Optimizer/Support/Utils.h" #include "flang/Semantics/runtime-type-info.h" #include "flang/Semantics/tools.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include +static llvm::cl::opt allowAssumedRank( + "allow-assumed-rank", + llvm::cl::desc("Enable assumed rank lowering - experimental"), + llvm::cl::init(false)); + #define DEBUG_TYPE "flang-lower-variable" /// Helper to lower a scalar expression using a specific symbol mapping. @@ -1885,7 +1891,8 @@ void Fortran::lower::mapSymbolAttributes( return; } - if (Fortran::evaluate::IsAssumedRank(sym)) + const bool isAssumedRank = Fortran::evaluate::IsAssumedRank(sym); + if (isAssumedRank && !allowAssumedRank) TODO(loc, "assumed-rank variable in procedure implemented in Fortran"); Fortran::lower::BoxAnalyzer ba; @@ -1894,6 +1901,8 @@ void Fortran::lower::mapSymbolAttributes( // First deal with pointers and allocatables, because their handling here // is the same regardless of their rank. if (Fortran::semantics::IsAllocatableOrPointer(sym)) { + if (isAssumedRank) + TODO(loc, "assumed-rank pointer or allocatable"); // Get address of fir.box describing the entity. // global mlir::Value boxAlloc = preAlloc; @@ -1942,7 +1951,7 @@ void Fortran::lower::mapSymbolAttributes( if (mlir::Value len = lowerExplicitCharLen(converter, loc, ba, symMap, stmtCtx)) explicitParams.push_back(len); - if (sym.Rank() == 0) { + if (!isAssumedRank && sym.Rank() == 0) { // Do not keep scalar characters as fir.box (even when optional). // Lowering and FIR is not meant to deal with scalar characters as // fir.box outside of calls. @@ -1987,9 +1996,11 @@ void Fortran::lower::mapSymbolAttributes( } } // TODO: derived type length parameters. - lowerExplicitLowerBounds(converter, loc, ba, lbounds, symMap, stmtCtx); - lowerExplicitExtents(converter, loc, ba, lbounds, explicitExtents, symMap, - stmtCtx); + if (!isAssumedRank) { + lowerExplicitLowerBounds(converter, loc, ba, lbounds, symMap, stmtCtx); + lowerExplicitExtents(converter, loc, ba, lbounds, explicitExtents, + symMap, stmtCtx); + } genBoxDeclare(converter, symMap, sym, dummyArg, lbounds, explicitParams, explicitExtents, replace); return; @@ -2021,6 +2032,11 @@ void Fortran::lower::mapSymbolAttributes( if (isUnusedEntryDummy) { assert(!Fortran::semantics::IsAllocatableOrPointer(sym) && "handled above"); + // Need to add support for allocatable assumed-rank to use + // logic below, or to simplify it and add codegen for fir.zero + // !fir.box<> instead. + if (isAssumedRank) + TODO(loc, "assumed rank in ENTRY"); // The box is read right away because lowering code does not expect // a non pointer/allocatable symbol to be mapped to a MutableBox. mlir::Type ty = converter.genType(var); @@ -2042,6 +2058,13 @@ void Fortran::lower::mapSymbolAttributes( return false; }; + if (isAssumedRank) { + assert(isUnusedEntryDummy && "assumed rank must be pointers/allocatables " + "or descriptor dummy arguments"); + genUnusedEntryPointBox(); + return; + } + // Helper to generate scalars for the symbol properties. auto genValue = [&](const Fortran::lower::SomeExpr &expr) { return genScalarValue(converter, loc, expr, symMap, stmtCtx); diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp index b722e19272ca1..557a9685024c5 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp @@ -86,7 +86,7 @@ void DataSharingProcessor::insertDeallocs() { if (semantics::IsAllocatable(sym->GetUltimate())) { if (!useDelayedPrivatization) { converter.createHostAssociateVarCloneDealloc(*sym); - return; + continue; } lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym); diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 74e68725003cb..664453ebaf2f7 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -391,9 +391,8 @@ struct BoxIsArrayOpConversion : public fir::FIROpConversion { mlir::Value a = adaptor.getOperands()[0]; auto loc = boxisarray.getLoc(); TypePair boxTyPair = getBoxTypePair(boxisarray.getVal().getType()); - auto rank = getValueFromBox(loc, boxTyPair, a, rewriter.getI32Type(), - rewriter, kRankPosInBox); - auto c0 = genConstantOffset(loc, rewriter, 0); + mlir::Value rank = getRankFromBox(loc, boxTyPair, a, rewriter); + mlir::Value c0 = genConstantIndex(loc, rank.getType(), rewriter, 0); rewriter.replaceOpWithNewOp( boxisarray, mlir::LLVM::ICmpPredicate::ne, rank, c0); return mlir::success(); @@ -430,8 +429,8 @@ struct BoxRankOpConversion : public fir::FIROpConversion { auto loc = boxrank.getLoc(); mlir::Type ty = convertType(boxrank.getType()); TypePair boxTyPair = getBoxTypePair(boxrank.getVal().getType()); - auto result = - getValueFromBox(loc, boxTyPair, a, ty, rewriter, kRankPosInBox); + mlir::Value rank = getRankFromBox(loc, boxTyPair, a, rewriter); + mlir::Value result = integerCast(loc, rewriter, ty, rank); rewriter.replaceOp(boxrank, result); return mlir::success(); } diff --git a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp index 69e78167b0733..8c726d547491a 100644 --- a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp +++ b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp @@ -179,6 +179,14 @@ mlir::Value ConvertFIRToLLVMPattern::getElementSizeFromBox( return getValueFromBox(loc, boxTy, box, resultTy, rewriter, kElemLenPosInBox); } +/// Read base address from a fir.box. Returned address has type ty. +mlir::Value ConvertFIRToLLVMPattern::getRankFromBox( + mlir::Location loc, TypePair boxTy, mlir::Value box, + mlir::ConversionPatternRewriter &rewriter) const { + mlir::Type resultTy = getBoxEleTy(boxTy.llvm, {kRankPosInBox}); + return getValueFromBox(loc, boxTy, box, resultTy, rewriter, kRankPosInBox); +} + // Get the element type given an LLVM type that is of the form // (array|struct|vector)+ and the provided indexes. mlir::Type ConvertFIRToLLVMPattern::getBoxEleTy( diff --git a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp index e56595d1c8e23..b48b993ddc5af 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp @@ -348,7 +348,17 @@ class DeclareOpConversion : public mlir::OpRewritePattern { // Helper to generate the hlfir fir.box with the local lower bounds and // type parameters. auto genHlfirBox = [&]() -> mlir::Value { - if (!mlir::isa(firBase.getType())) { + if (auto baseBoxType = + mlir::dyn_cast(firBase.getType())) { + // Rebox so that lower bounds are correct. + if (baseBoxType.isAssumedRank()) + return builder.create( + loc, hlfirBaseType, firBase, + fir::LowerBoundModifierAttribute::SetToOnes); + return builder.create(loc, hlfirBaseType, firBase, + declareOp.getShape(), + /*slice=*/mlir::Value{}); + } else { llvm::SmallVector typeParams; auto maybeCharType = mlir::dyn_cast( fir::unwrapSequenceType(fir::unwrapPassByRefType(hlfirBaseType))); @@ -358,11 +368,6 @@ class DeclareOpConversion : public mlir::OpRewritePattern { return builder.create( loc, hlfirBaseType, firBase, declareOp.getShape(), /*slice=*/mlir::Value{}, typeParams); - } else { - // Rebox so that lower bounds are correct. - return builder.create(loc, hlfirBaseType, firBase, - declareOp.getShape(), - /*slice=*/mlir::Value{}); } }; if (!mlir::cast(declareOp.getOperation()) @@ -789,7 +794,3 @@ class ConvertHLFIRtoFIR }; } // namespace - -std::unique_ptr hlfir::createConvertHLFIRtoFIRPass() { - return std::make_unique(); -} diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90 index f34820dd10792..0224ecfdde7c6 100644 --- a/flang/module/cudadevice.f90 +++ b/flang/module/cudadevice.f90 @@ -18,34 +18,34 @@ module cudadevice ! Synchronization Functions interface - attributes(device) subroutine syncthreads() + attributes(device) subroutine syncthreads() bind(c, name='__syncthreads') end subroutine end interface public :: syncthreads interface - attributes(device) integer function syncthreads_and(value) + attributes(device) integer function syncthreads_and(value) bind(c, name='__syncthreads_and') integer :: value end function end interface public :: syncthreads_and interface - attributes(device) integer function syncthreads_count(value) + attributes(device) integer function syncthreads_count(value) bind(c, name='__syncthreads_count') integer :: value end function end interface public :: syncthreads_count interface - attributes(device) integer function syncthreads_or(value) + attributes(device) integer function syncthreads_or(value) bind(c, name='__syncthreads_or') integer :: value end function end interface public :: syncthreads_or interface - attributes(device) subroutine syncwarp(mask) + attributes(device) subroutine syncwarp(mask) bind(c, name='__syncwarp') integer :: mask end subroutine end interface @@ -54,19 +54,19 @@ attributes(device) subroutine syncwarp(mask) ! Memory Fences interface - attributes(device) subroutine threadfence() + attributes(device) subroutine threadfence() bind(c, name='__threadfence') end subroutine end interface public :: threadfence interface - attributes(device) subroutine threadfence_block() + attributes(device) subroutine threadfence_block() bind(c, name='__threadfence_block') end subroutine end interface public :: threadfence_block interface - attributes(device) subroutine threadfence_system() + attributes(device) subroutine threadfence_system() bind(c, name='__threadfence_system') end subroutine end interface public :: threadfence_system diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir index 21323a5e657c9..70cb0443e9a64 100644 --- a/flang/test/Fir/convert-to-llvm.fir +++ b/flang/test/Fir/convert-to-llvm.fir @@ -941,7 +941,8 @@ func.func @extract_rank(%arg0: !fir.box>) -> i32 { // CHECK-LABEL: llvm.func @extract_rank( // CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr) -> i32 // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ARG0]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})> -// CHECK: %[[RANK:.*]] = llvm.load %[[GEP]] : !llvm.ptr -> i32 +// CHECK: %[[RAW_RANK:.*]] = llvm.load %[[GEP]] : !llvm.ptr -> i8 +// CHECK: %[[RANK:.*]] = llvm.sext %[[RAW_RANK]] : i8 to i32 // CHECK: llvm.return %[[RANK]] : i32 // ----- @@ -1009,9 +1010,9 @@ func.func @box_isarray(%arg0: !fir.box>) -> i1 { // CHECK-LABEL: llvm.func @box_isarray( // CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr) -> i1 // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ARG0]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})> -// CHECK: %[[RANK:.*]] = llvm.load %[[GEP]] : !llvm.ptr -> i32 -// CHECK: %[[C0_ISARRAY:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[IS_ARRAY:.*]] = llvm.icmp "ne" %[[RANK]], %[[C0_ISARRAY]] : i32 +// CHECK: %[[RANK:.*]] = llvm.load %[[GEP]] : !llvm.ptr -> i8 +// CHECK: %[[C0_ISARRAY:.*]] = llvm.mlir.constant(0 : i64) : i8 +// CHECK: %[[IS_ARRAY:.*]] = llvm.icmp "ne" %[[RANK]], %[[C0_ISARRAY]] : i8 // CHECK: llvm.return %[[IS_ARRAY]] : i1 // ----- diff --git a/flang/test/Fir/tbaa.fir b/flang/test/Fir/tbaa.fir index 048f53f5c6e47..f4f23d35cba25 100644 --- a/flang/test/Fir/tbaa.fir +++ b/flang/test/Fir/tbaa.fir @@ -248,8 +248,9 @@ func.func @tbaa(%arg0: !fir.box>) -> i32 { // CHECK-LABEL: llvm.func @tbaa( // CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr) -> i32 { // CHECK: %[[VAL_1:.*]] = llvm.getelementptr %[[VAL_0]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> -// CHECK: %[[VAL_2:.*]] = llvm.load %[[VAL_1]] {tbaa = [#[[$BOXT]]]} : !llvm.ptr -> i32 -// CHECK: llvm.return %[[VAL_2]] : i32 +// CHECK: %[[VAL_2:.*]] = llvm.load %[[VAL_1]] {tbaa = [#[[$BOXT]]]} : !llvm.ptr -> i8 +// CHECK: %[[VAL_3:.*]] = llvm.sext %[[VAL_2]] : i8 to i32 +// CHECK: llvm.return %[[VAL_3]] : i32 // CHECK: } // ----- @@ -267,9 +268,9 @@ func.func @tbaa(%arg0: !fir.box>) -> i1 { // CHECK-LABEL: llvm.func @tbaa( // CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr) -> i1 { // CHECK: %[[VAL_1:.*]] = llvm.getelementptr %[[VAL_0]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> -// CHECK: %[[VAL_2:.*]] = llvm.load %[[VAL_1]] {tbaa = [#[[$BOXT]]]} : !llvm.ptr -> i32 -// CHECK: %[[VAL_3:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[VAL_4:.*]] = llvm.icmp "ne" %[[VAL_2]], %[[VAL_3]] : i32 +// CHECK: %[[VAL_2:.*]] = llvm.load %[[VAL_1]] {tbaa = [#[[$BOXT]]]} : !llvm.ptr -> i8 +// CHECK: %[[VAL_3:.*]] = llvm.mlir.constant(0 : i64) : i8 +// CHECK: %[[VAL_4:.*]] = llvm.icmp "ne" %[[VAL_2]], %[[VAL_3]] : i8 // CHECK: llvm.return %[[VAL_4]] : i1 // CHECK: } diff --git a/flang/test/HLFIR/declare-codegen.fir b/flang/test/HLFIR/declare-codegen.fir index 9f51d0fbc7afd..bd0d61a2559db 100644 --- a/flang/test/HLFIR/declare-codegen.fir +++ b/flang/test/HLFIR/declare-codegen.fir @@ -210,3 +210,12 @@ func.func @dummy_scope(%arg0: !fir.ref) { // CHECK-SAME: %[[VAL_0:.*]]: !fir.ref) { // CHECK: %[[SCOPE:.*]] = fir.dummy_scope : !fir.dscope // CHECK: %[[VAL_1:.*]] = fir.declare %[[VAL_0]] dummy_scope %[[SCOPE]] {uniq_name = "x"} : (!fir.ref, !fir.dscope) -> !fir.ref + +func.func @assumed_rank_declare(%arg0: !fir.box>) { + %0:2 = hlfir.declare %arg0 {uniq_name = "x"} : (!fir.box>) -> (!fir.box>, !fir.box>) + return +} +// CHECK-LABEL: func.func @assumed_rank_declare( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>) { +// CHECK: %[[VAL_1:.*]] = fir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box>) -> !fir.box> +// CHECK: %[[VAL_2:.*]] = fir.rebox_assumed_rank %[[VAL_1]] lbs ones : (!fir.box>) -> !fir.box> diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf new file mode 100644 index 0000000000000..0c71ea6efcd63 --- /dev/null +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -0,0 +1,36 @@ +! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s + +! Test CUDA Fortran procedures available in cudadevice module + +attributes(global) subroutine devsub() + implicit none + integer :: ret + + call syncthreads() + call syncwarp(1) + call threadfence() + call threadfence_block() + call threadfence_system() + ret = syncthreads_and(1) + ret = syncthreads_count(1) + ret = syncthreads_or(1) +end + +! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc} +! CHECK: fir.call @__syncthreads() +! CHECK: fir.call @__syncwarp(%{{.*}}) fastmath : (!fir.ref) -> () +! CHECK: fir.call @__threadfence() +! CHECK: fir.call @__threadfence_block() +! CHECK: fir.call @__threadfence_system() +! CHECK: %{{.*}} = fir.call @__syncthreads_and(%{{.*}}) fastmath : (!fir.ref) -> i32 +! CHECK: %{{.*}} = fir.call @__syncthreads_count(%{{.*}}) fastmath : (!fir.ref) -> i32 +! CHECK: %{{.*}} = fir.call @__syncthreads_or(%{{.*}}) fastmath : (!fir.ref) -> i32 + +! CHECK: func.func private @__syncthreads() attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__syncthreads"} +! CHECK: func.func private @__syncwarp(!fir.ref {cuf.data_attr = #cuf.cuda}) attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__syncwarp"} +! CHECK: func.func private @__threadfence() attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__threadfence"} +! CHECK: func.func private @__threadfence_block() attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__threadfence_block"} +! CHECK: func.func private @__threadfence_system() attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__threadfence_system"} +! CHECK: func.func private @__syncthreads_and(!fir.ref {cuf.data_attr = #cuf.cuda}) -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__syncthreads_and"} +! CHECK: func.func private @__syncthreads_count(!fir.ref {cuf.data_attr = #cuf.cuda}) -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__syncthreads_count"} +! CHECK: func.func private @__syncthreads_or(!fir.ref {cuf.data_attr = #cuf.cuda}) -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__syncthreads_or"} diff --git a/flang/test/Lower/HLFIR/convert-variable-assumed-rank.f90 b/flang/test/Lower/HLFIR/convert-variable-assumed-rank.f90 new file mode 100644 index 0000000000000..748c15be84496 --- /dev/null +++ b/flang/test/Lower/HLFIR/convert-variable-assumed-rank.f90 @@ -0,0 +1,70 @@ +! Test lowering of assumed-rank variables +! RUN: bbc -emit-hlfir %s -allow-assumed-rank -o - | FileCheck %s + +module assumed_rank_tests +interface +subroutine takes_real(x) + real :: x(..) +end subroutine +subroutine takes_char(x) + character(*) :: x(..) +end subroutine +end interface +contains + +subroutine test_intrinsic(x) + real :: x(..) + call takes_real(x) +end subroutine + +subroutine test_character_explicit_len(x, n) + integer(8) :: n + character(n) :: x(..) + call takes_char(x) +end subroutine + +subroutine test_character_assumed_len(x) + character(*) :: x(..) + call takes_char(x) +end subroutine + +subroutine test_with_attrs(x) + real, target, optional :: x(..) + call takes_real(x) +end subroutine +! CHECK-LABEL: func.func @_QMassumed_rank_testsPtest_intrinsic( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "x"}) { +! CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_1]] {uniq_name = "_QMassumed_rank_testsFtest_intrinsicEx"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) +! CHECK: fir.call @_QPtakes_real(%[[VAL_2]]#0) fastmath : (!fir.box>) -> () +! CHECK: return +! CHECK: } + +! CHECK-LABEL: func.func @_QMassumed_rank_testsPtest_character_explicit_len( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box>> {fir.bindc_name = "x"}, +! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "n"}) { +! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %[[VAL_2]] {uniq_name = "_QMassumed_rank_testsFtest_character_explicit_lenEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref +! CHECK: %[[VAL_5:.*]] = arith.constant 0 : i64 +! CHECK: %[[VAL_6:.*]] = arith.cmpi sgt, %[[VAL_4]], %[[VAL_5]] : i64 +! CHECK: %[[VAL_7:.*]] = arith.select %[[VAL_6]], %[[VAL_4]], %[[VAL_5]] : i64 +! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_0]] typeparams %[[VAL_7]] dummy_scope %[[VAL_2]] {uniq_name = "_QMassumed_rank_testsFtest_character_explicit_lenEx"} : (!fir.box>>, i64, !fir.dscope) -> (!fir.box>>, !fir.box>>) +! CHECK: fir.call @_QPtakes_char(%[[VAL_8]]#0) fastmath : (!fir.box>>) -> () +! CHECK: return +! CHECK: } + +! CHECK-LABEL: func.func @_QMassumed_rank_testsPtest_character_assumed_len( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box>> {fir.bindc_name = "x"}) { +! CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_1]] {uniq_name = "_QMassumed_rank_testsFtest_character_assumed_lenEx"} : (!fir.box>>, !fir.dscope) -> (!fir.box>>, !fir.box>>) +! CHECK: fir.call @_QPtakes_char(%[[VAL_2]]#0) fastmath : (!fir.box>>) -> () +! CHECK: return +! CHECK: } + +! CHECK-LABEL: func.func @_QMassumed_rank_testsPtest_with_attrs( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "x", fir.optional, fir.target}) { +! CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_1]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QMassumed_rank_testsFtest_with_attrsEx"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) +! CHECK: fir.call @_QPtakes_real(%[[VAL_2]]#0) fastmath : (!fir.box>) -> () +end module diff --git a/flang/test/Lower/OpenMP/allocatable-multiple-vars.f90 b/flang/test/Lower/OpenMP/allocatable-multiple-vars.f90 new file mode 100644 index 0000000000000..e6450a13e13a0 --- /dev/null +++ b/flang/test/Lower/OpenMP/allocatable-multiple-vars.f90 @@ -0,0 +1,28 @@ +! Test early privatization for multiple allocatable variables. + +! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization=false \ +! RUN: -o - %s 2>&1 | FileCheck %s + +! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization=false -o - %s 2>&1 |\ +! RUN: FileCheck %s + +subroutine delayed_privatization_allocatable + implicit none + integer, allocatable :: var1, var2 + +!$omp parallel private(var1, var2) + var1 = 10 + var2 = 20 +!$omp end parallel +end subroutine + +! Verify that private versions of each variable are both allocated and freed +! within the parallel region. + +! CHECK: omp.parallel { +! CHECK: fir.allocmem +! CHECK: fir.allocmem +! CHECK: fir.freemem +! CHECK: fir.freemem +! CHECK: omp.terminator +! CHECK-NEXT: } diff --git a/flang/test/Lower/PowerPC/ppc-vec-load.f90 b/flang/test/Lower/PowerPC/ppc-vec-load.f90 index 4d51512df0f7b..a81ed055ce08c 100644 --- a/flang/test/Lower/PowerPC/ppc-vec-load.f90 +++ b/flang/test/Lower/PowerPC/ppc-vec-load.f90 @@ -1,12 +1,13 @@ -! RUN: %flang_fc1 -flang-experimental-hlfir -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR","LLVMIR-LE" %s -! RUN: %flang_fc1 -flang-experimental-hlfir -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR","LLVMIR-BE" %s +! RUN: %flang_fc1 -flang-experimental-hlfir -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR","LLVMIR-LE","LLVM" %s +! RUN: %flang_fc1 -triple powerpc64le-unknown-unknown -target-cpu pwr9 -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR","LLVMIR_P9","LLVM" %s +! RUN: %flang_fc1 -flang-experimental-hlfir -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR","LLVMIR-BE","LLVM" %s ! REQUIRES: target=powerpc{{.*}} !---------------------- ! vec_ld !---------------------- -! CHECK-LABEL: @vec_ld_testi8 +! LLVM-LABEL: @vec_ld_testi8 subroutine vec_ld_testi8(arg1, arg2, res) integer(1) :: arg1 vector(integer(1)) :: arg2, res @@ -19,7 +20,7 @@ subroutine vec_ld_testi8(arg1, arg2, res) ! LLVMIR: store <16 x i8> %[[bc]], ptr %2, align 16 end subroutine vec_ld_testi8 -! CHECK-LABEL: @vec_ld_testi16 +! LLVM-LABEL: @vec_ld_testi16 subroutine vec_ld_testi16(arg1, arg2, res) integer(2) :: arg1 vector(integer(2)) :: arg2, res @@ -32,7 +33,7 @@ subroutine vec_ld_testi16(arg1, arg2, res) ! LLVMIR: store <8 x i16> %[[bc]], ptr %2, align 16 end subroutine vec_ld_testi16 -! CHECK-LABEL: @vec_ld_testi32 +! LLVM-LABEL: @vec_ld_testi32 subroutine vec_ld_testi32(arg1, arg2, res) integer(4) :: arg1 vector(integer(4)) :: arg2, res @@ -44,7 +45,7 @@ subroutine vec_ld_testi32(arg1, arg2, res) ! LLVMIR: store <4 x i32> %[[bc]], ptr %2, align 16 end subroutine vec_ld_testi32 -! CHECK-LABEL: @vec_ld_testf32 +! LLVM-LABEL: @vec_ld_testf32 subroutine vec_ld_testf32(arg1, arg2, res) integer(8) :: arg1 vector(real(4)) :: arg2, res @@ -58,7 +59,7 @@ subroutine vec_ld_testf32(arg1, arg2, res) ! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16 end subroutine vec_ld_testf32 -! CHECK-LABEL: @vec_ld_testu32 +! LLVM-LABEL: @vec_ld_testu32 subroutine vec_ld_testu32(arg1, arg2, res) integer(1) :: arg1 vector(unsigned(4)) :: arg2, res @@ -70,7 +71,7 @@ subroutine vec_ld_testu32(arg1, arg2, res) ! LLVMIR: store <4 x i32> %[[call]], ptr %2, align 16 end subroutine vec_ld_testu32 -! CHECK-LABEL: @vec_ld_testi32a +! LLVM-LABEL: @vec_ld_testi32a subroutine vec_ld_testi32a(arg1, arg2, res) integer(4) :: arg1 integer(4) :: arg2(10) @@ -83,7 +84,7 @@ subroutine vec_ld_testi32a(arg1, arg2, res) ! LLVMIR: store <4 x i32> %[[call]], ptr %2, align 16 end subroutine vec_ld_testi32a -! CHECK-LABEL: @vec_ld_testf32av +! LLVM-LABEL: @vec_ld_testf32av subroutine vec_ld_testf32av(arg1, arg2, res) integer(8) :: arg1 vector(real(4)) :: arg2(2, 4, 8) @@ -98,7 +99,7 @@ subroutine vec_ld_testf32av(arg1, arg2, res) ! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16 end subroutine vec_ld_testf32av -! CHECK-LABEL: @vec_ld_testi32s +! LLVM-LABEL: @vec_ld_testi32s subroutine vec_ld_testi32s(arg1, arg2, res) integer(4) :: arg1 real(4) :: arg2 @@ -116,7 +117,7 @@ end subroutine vec_ld_testi32s ! vec_lde !---------------------- -! CHECK-LABEL: @vec_lde_testi8s +! LLVM-LABEL: @vec_lde_testi8s subroutine vec_lde_testi8s(arg1, arg2, res) integer(1) :: arg1 integer(1) :: arg2 @@ -129,7 +130,7 @@ subroutine vec_lde_testi8s(arg1, arg2, res) ! LLVMIR: store <16 x i8> %[[call]], ptr %2, align 16 end subroutine vec_lde_testi8s -! CHECK-LABEL: @vec_lde_testi16a +! LLVM-LABEL: @vec_lde_testi16a subroutine vec_lde_testi16a(arg1, arg2, res) integer(2) :: arg1 integer(2) :: arg2(2, 4, 8) @@ -142,7 +143,7 @@ subroutine vec_lde_testi16a(arg1, arg2, res) ! LLVMIR: store <8 x i16> %[[call]], ptr %2, align 16 end subroutine vec_lde_testi16a -! CHECK-LABEL: @vec_lde_testi32a +! LLVM-LABEL: @vec_lde_testi32a subroutine vec_lde_testi32a(arg1, arg2, res) integer(4) :: arg1 integer(4) :: arg2(4) @@ -155,7 +156,7 @@ subroutine vec_lde_testi32a(arg1, arg2, res) ! LLVMIR: store <4 x i32> %[[call]], ptr %2, align 16 end subroutine vec_lde_testi32a -! CHECK-LABEL: @vec_lde_testf32a +! LLVM-LABEL: @vec_lde_testf32a subroutine vec_lde_testf32a(arg1, arg2, res) integer(8) :: arg1 real(4) :: arg2(4) @@ -173,7 +174,7 @@ end subroutine vec_lde_testf32a ! vec_ldl !---------------------- -! CHECK-LABEL: @vec_ldl_testi8 +! LLVM-LABEL: @vec_ldl_testi8 subroutine vec_ldl_testi8(arg1, arg2, res) integer(1) :: arg1 vector(integer(1)) :: arg2, res @@ -186,7 +187,7 @@ subroutine vec_ldl_testi8(arg1, arg2, res) ! LLVMIR: store <16 x i8> %[[bc]], ptr %2, align 16 end subroutine vec_ldl_testi8 -! CHECK-LABEL: @vec_ldl_testi16 +! LLVM-LABEL: @vec_ldl_testi16 subroutine vec_ldl_testi16(arg1, arg2, res) integer(2) :: arg1 vector(integer(2)) :: arg2, res @@ -199,7 +200,7 @@ subroutine vec_ldl_testi16(arg1, arg2, res) ! LLVMIR: store <8 x i16> %[[bc]], ptr %2, align 16 end subroutine vec_ldl_testi16 -! CHECK-LABEL: @vec_ldl_testi32 +! LLVM-LABEL: @vec_ldl_testi32 subroutine vec_ldl_testi32(arg1, arg2, res) integer(4) :: arg1 vector(integer(4)) :: arg2, res @@ -211,7 +212,7 @@ subroutine vec_ldl_testi32(arg1, arg2, res) ! LLVMIR: store <4 x i32> %[[bc]], ptr %2, align 16 end subroutine vec_ldl_testi32 -! CHECK-LABEL: @vec_ldl_testf32 +! LLVM-LABEL: @vec_ldl_testf32 subroutine vec_ldl_testf32(arg1, arg2, res) integer(8) :: arg1 vector(real(4)) :: arg2, res @@ -225,7 +226,7 @@ subroutine vec_ldl_testf32(arg1, arg2, res) ! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16 end subroutine vec_ldl_testf32 -! CHECK-LABEL: @vec_ldl_testu32 +! LLVM-LABEL: @vec_ldl_testu32 subroutine vec_ldl_testu32(arg1, arg2, res) integer(1) :: arg1 vector(unsigned(4)) :: arg2, res @@ -237,7 +238,7 @@ subroutine vec_ldl_testu32(arg1, arg2, res) ! LLVMIR: store <4 x i32> %[[call]], ptr %2, align 16 end subroutine vec_ldl_testu32 -! CHECK-LABEL: @vec_ldl_testi32a +! LLVM-LABEL: @vec_ldl_testi32a subroutine vec_ldl_testi32a(arg1, arg2, res) integer(4) :: arg1 integer(4) :: arg2(10) @@ -250,7 +251,7 @@ subroutine vec_ldl_testi32a(arg1, arg2, res) ! LLVMIR: store <4 x i32> %[[call]], ptr %2, align 16 end subroutine vec_ldl_testi32a -! CHECK-LABEL: @vec_ldl_testf32av +! LLVM-LABEL: @vec_ldl_testf32av subroutine vec_ldl_testf32av(arg1, arg2, res) integer(8) :: arg1 vector(real(4)) :: arg2(2, 4, 8) @@ -264,7 +265,7 @@ subroutine vec_ldl_testf32av(arg1, arg2, res) ! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16 end subroutine vec_ldl_testf32av -! CHECK-LABEL: @vec_ldl_testi32s +! LLVM-LABEL: @vec_ldl_testi32s subroutine vec_ldl_testi32s(arg1, arg2, res) integer(4) :: arg1 real(4) :: arg2 @@ -282,7 +283,7 @@ end subroutine vec_ldl_testi32s ! vec_lvsl !---------------------- -! CHECK-LABEL: @vec_lvsl_testi8s +! LLVM-LABEL: @vec_lvsl_testi8s subroutine vec_lvsl_testi8s(arg1, arg2, res) integer(1) :: arg1 integer(1) :: arg2 @@ -300,7 +301,7 @@ subroutine vec_lvsl_testi8s(arg1, arg2, res) ! LLVMIR-BE: store <16 x i8> %[[ld]], ptr %2, align 16 end subroutine vec_lvsl_testi8s -! CHECK-LABEL: @vec_lvsl_testi16a +! LLVM-LABEL: @vec_lvsl_testi16a subroutine vec_lvsl_testi16a(arg1, arg2, res) integer(2) :: arg1 integer(2) :: arg2(4) @@ -318,7 +319,7 @@ subroutine vec_lvsl_testi16a(arg1, arg2, res) ! LLVMIR-BE: store <16 x i8> %[[ld]], ptr %2, align 16 end subroutine vec_lvsl_testi16a -! CHECK-LABEL: @vec_lvsl_testi32a +! LLVM-LABEL: @vec_lvsl_testi32a subroutine vec_lvsl_testi32a(arg1, arg2, res) integer(4) :: arg1 integer(4) :: arg2(2, 3, 4) @@ -336,7 +337,7 @@ subroutine vec_lvsl_testi32a(arg1, arg2, res) ! LLVMIR-BE: store <16 x i8> %[[ld]], ptr %2, align 16 end subroutine vec_lvsl_testi32a -! CHECK-LABEL: @vec_lvsl_testf32a +! LLVM-LABEL: @vec_lvsl_testf32a subroutine vec_lvsl_testf32a(arg1, arg2, res) integer(8) :: arg1 real(4) :: arg2(4) @@ -357,7 +358,7 @@ end subroutine vec_lvsl_testf32a ! vec_lvsr !---------------------- -! CHECK-LABEL: @vec_lvsr_testi8s +! LLVM-LABEL: @vec_lvsr_testi8s subroutine vec_lvsr_testi8s(arg1, arg2, res) integer(1) :: arg1 integer(1) :: arg2 @@ -375,7 +376,7 @@ subroutine vec_lvsr_testi8s(arg1, arg2, res) ! LLVMIR-BE: store <16 x i8> %[[addr]], ptr %2, align 16 end subroutine vec_lvsr_testi8s -! CHECK-LABEL: @vec_lvsr_testi16a +! LLVM-LABEL: @vec_lvsr_testi16a subroutine vec_lvsr_testi16a(arg1, arg2, res) integer(2) :: arg1 integer(2) :: arg2(4) @@ -393,7 +394,7 @@ subroutine vec_lvsr_testi16a(arg1, arg2, res) ! LLVMIR-BE: store <16 x i8> %[[addr]], ptr %2, align 16 end subroutine vec_lvsr_testi16a -! CHECK-LABEL: @vec_lvsr_testi32a +! LLVM-LABEL: @vec_lvsr_testi32a subroutine vec_lvsr_testi32a(arg1, arg2, res) integer(4) :: arg1 integer(4) :: arg2(2, 3, 4) @@ -411,7 +412,7 @@ subroutine vec_lvsr_testi32a(arg1, arg2, res) ! LLVMIR-BE: store <16 x i8> %[[addr]], ptr %2, align 16 end subroutine vec_lvsr_testi32a -! CHECK-LABEL: @vec_lvsr_testf32a +! LLVM-LABEL: @vec_lvsr_testf32a subroutine vec_lvsr_testf32a(arg1, arg2, res) integer(8) :: arg1 real(4) :: arg2(4) @@ -432,7 +433,7 @@ end subroutine vec_lvsr_testf32a ! vec_lxv !---------------------- -! CHECK-LABEL: @vec_lxv_testi8a +! LLVM-LABEL: @vec_lxv_testi8a subroutine vec_lxv_testi8a(arg1, arg2, res) integer(1) :: arg1 integer(1) :: arg2(4) @@ -445,7 +446,7 @@ subroutine vec_lxv_testi8a(arg1, arg2, res) ! LLVMIR_P9: store <16 x i8> %[[ld]], ptr %2, align 16 end subroutine vec_lxv_testi8a -! CHECK-LABEL: @vec_lxv_testi16a +! LLVM-LABEL: @vec_lxv_testi16a subroutine vec_lxv_testi16a(arg1, arg2, res) integer(2) :: arg1 integer(2) :: arg2(2, 4, 8) @@ -458,7 +459,7 @@ subroutine vec_lxv_testi16a(arg1, arg2, res) ! LLVMIR_P9: store <8 x i16> %[[ld]], ptr %2, align 16 end subroutine vec_lxv_testi16a -! CHECK-LABEL: @vec_lxv_testi32a +! LLVM-LABEL: @vec_lxv_testi32a subroutine vec_lxv_testi32a(arg1, arg2, res) integer(4) :: arg1 integer(4) :: arg2(2, 4, 8) @@ -471,7 +472,7 @@ subroutine vec_lxv_testi32a(arg1, arg2, res) ! LLVMIR_P9: store <4 x i32> %[[ld]], ptr %2, align 16 end subroutine vec_lxv_testi32a -! CHECK-LABEL: @vec_lxv_testf32a +! LLVM-LABEL: @vec_lxv_testf32a subroutine vec_lxv_testf32a(arg1, arg2, res) integer(2) :: arg1 real(4) :: arg2(4) @@ -484,7 +485,7 @@ subroutine vec_lxv_testf32a(arg1, arg2, res) ! LLVMIR_P9: store <4 x float> %[[ld]], ptr %2, align 16 end subroutine vec_lxv_testf32a -! CHECK-LABEL: @vec_lxv_testf64a +! LLVM-LABEL: @vec_lxv_testf64a subroutine vec_lxv_testf64a(arg1, arg2, res) integer(8) :: arg1 real(8) :: arg2(4) @@ -501,7 +502,7 @@ end subroutine vec_lxv_testf64a ! vec_xld2 !---------------------- -! CHECK-LABEL: @vec_xld2_testi8a +! LLVM-LABEL: @vec_xld2_testi8a subroutine vec_xld2_testi8a(arg1, arg2, res) integer(1) :: arg1 vector(integer(1)) :: arg2(4) @@ -515,7 +516,7 @@ subroutine vec_xld2_testi8a(arg1, arg2, res) ! LLVMIR: store <16 x i8> %[[bc]], ptr %2, align 16 end subroutine vec_xld2_testi8a -! CHECK-LABEL: @vec_xld2_testi16 +! LLVM-LABEL: @vec_xld2_testi16 subroutine vec_xld2_testi16(arg1, arg2, res) integer :: arg1 vector(integer(2)) :: arg2 @@ -529,7 +530,7 @@ subroutine vec_xld2_testi16(arg1, arg2, res) ! LLVMIR: store <8 x i16> %[[bc]], ptr %2, align 16 end subroutine vec_xld2_testi16 -! CHECK-LABEL: @vec_xld2_testi32a +! LLVM-LABEL: @vec_xld2_testi32a subroutine vec_xld2_testi32a(arg1, arg2, res) integer(4) :: arg1 vector(integer(4)) :: arg2(41) @@ -543,7 +544,7 @@ subroutine vec_xld2_testi32a(arg1, arg2, res) ! LLVMIR: store <4 x i32> %[[bc]], ptr %2, align 16 end subroutine vec_xld2_testi32a -! CHECK-LABEL: @vec_xld2_testi64a +! LLVM-LABEL: @vec_xld2_testi64a subroutine vec_xld2_testi64a(arg1, arg2, res) integer(8) :: arg1 vector(integer(8)) :: arg2(4) @@ -557,7 +558,7 @@ subroutine vec_xld2_testi64a(arg1, arg2, res) ! LLVMIR: store <2 x i64> %[[bc]], ptr %2, align 16 end subroutine vec_xld2_testi64a -! CHECK-LABEL: @vec_xld2_testf32a +! LLVM-LABEL: @vec_xld2_testf32a subroutine vec_xld2_testf32a(arg1, arg2, res) integer(2) :: arg1 vector(real(4)) :: arg2(4) @@ -571,7 +572,7 @@ subroutine vec_xld2_testf32a(arg1, arg2, res) ! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16 end subroutine vec_xld2_testf32a -! CHECK-LABEL: @vec_xld2_testf64a +! LLVM-LABEL: @vec_xld2_testf64a subroutine vec_xld2_testf64a(arg1, arg2, res) integer(8) :: arg1 vector(real(8)) :: arg2(4) @@ -588,7 +589,7 @@ end subroutine vec_xld2_testf64a ! vec_xl !---------------------- -! CHECK-LABEL: @vec_xl_testi8a +! LLVM-LABEL: @vec_xl_testi8a subroutine vec_xl_testi8a(arg1, arg2, res) integer(1) :: arg1 integer(1) :: arg2(4) @@ -601,7 +602,7 @@ subroutine vec_xl_testi8a(arg1, arg2, res) ! LLVMIR: store <16 x i8> %[[ld]], ptr %2, align 16 end subroutine vec_xl_testi8a -! CHECK-LABEL: @vec_xl_testi16a +! LLVM-LABEL: @vec_xl_testi16a subroutine vec_xl_testi16a(arg1, arg2, res) integer(2) :: arg1 integer(2) :: arg2(2, 4, 8) @@ -614,7 +615,7 @@ subroutine vec_xl_testi16a(arg1, arg2, res) ! LLVMIR: store <8 x i16> %[[ld]], ptr %2, align 16 end subroutine vec_xl_testi16a -! CHECK-LABEL: @vec_xl_testi32a +! LLVM-LABEL: @vec_xl_testi32a subroutine vec_xl_testi32a(arg1, arg2, res) integer(4) :: arg1 integer(4) :: arg2(2, 4, 8) @@ -627,7 +628,7 @@ subroutine vec_xl_testi32a(arg1, arg2, res) ! LLVMIR: store <4 x i32> %[[ld]], ptr %2, align 16 end subroutine vec_xl_testi32a -! CHECK-LABEL: @vec_xl_testi64a +! LLVM-LABEL: @vec_xl_testi64a subroutine vec_xl_testi64a(arg1, arg2, res) integer(8) :: arg1 integer(8) :: arg2(2, 4, 8) @@ -641,7 +642,7 @@ subroutine vec_xl_testi64a(arg1, arg2, res) ! LLVMIR: store <2 x i64> %[[bc]], ptr %2, align 16 end subroutine vec_xl_testi64a -! CHECK-LABEL: @vec_xl_testf32a +! LLVM-LABEL: @vec_xl_testf32a subroutine vec_xl_testf32a(arg1, arg2, res) integer(2) :: arg1 real(4) :: arg2(4) @@ -655,7 +656,7 @@ subroutine vec_xl_testf32a(arg1, arg2, res) ! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16 end subroutine vec_xl_testf32a -! CHECK-LABEL: @vec_xl_testf64a +! LLVM-LABEL: @vec_xl_testf64a subroutine vec_xl_testf64a(arg1, arg2, res) integer(8) :: arg1 real(8) :: arg2 @@ -672,7 +673,7 @@ end subroutine vec_xl_testf64a ! vec_xlds !---------------------- -! CHECK-LABEL: @vec_xlds_testi64a +! LLVM-LABEL: @vec_xlds_testi64a subroutine vec_xlds_testi64a(arg1, arg2, res) integer(8) :: arg1 vector(integer(8)) :: arg2(4) @@ -687,7 +688,7 @@ subroutine vec_xlds_testi64a(arg1, arg2, res) ! LLVMIR: store <2 x i64> %[[shfl]], ptr %2, align 16 end subroutine vec_xlds_testi64a -! CHECK-LABEL: @vec_xlds_testf64a +! LLVM-LABEL: @vec_xlds_testf64a subroutine vec_xlds_testf64a(arg1, arg2, res) integer(8) :: arg1 vector(real(8)) :: arg2(4) @@ -707,7 +708,7 @@ end subroutine vec_xlds_testf64a ! vec_xl_be !---------------------- -! CHECK-LABEL: @vec_xl_be_testi8a +! LLVM-LABEL: @vec_xl_be_testi8a subroutine vec_xl_be_testi8a(arg1, arg2, res) integer(1) :: arg1 integer(1) :: arg2(2, 4, 8) @@ -722,7 +723,7 @@ subroutine vec_xl_be_testi8a(arg1, arg2, res) ! LLVMIR-BE: store <16 x i8> %[[ld]], ptr %2, align 16 end subroutine vec_xl_be_testi8a -! CHECK-LABEL: @vec_xl_be_testi16a +! LLVM-LABEL: @vec_xl_be_testi16a subroutine vec_xl_be_testi16a(arg1, arg2, res) integer(2) :: arg1 integer(2) :: arg2(2, 4, 8) @@ -737,7 +738,7 @@ subroutine vec_xl_be_testi16a(arg1, arg2, res) ! LLVMIR-BE: store <8 x i16> %[[ld]], ptr %2, align 16 end subroutine vec_xl_be_testi16a -! CHECK-LABEL: @vec_xl_be_testi32a +! LLVM-LABEL: @vec_xl_be_testi32a subroutine vec_xl_be_testi32a(arg1, arg2, res) integer(4) :: arg1 integer(4) :: arg2(2, 4, 8) @@ -752,7 +753,7 @@ subroutine vec_xl_be_testi32a(arg1, arg2, res) ! LLVMIR-BE: store <4 x i32> %[[ld]], ptr %2, align 16 end subroutine vec_xl_be_testi32a -! CHECK-LABEL: @vec_xl_be_testi64a +! LLVM-LABEL: @vec_xl_be_testi64a subroutine vec_xl_be_testi64a(arg1, arg2, res) integer(8) :: arg1 integer(8) :: arg2(2, 4, 8) @@ -767,7 +768,7 @@ subroutine vec_xl_be_testi64a(arg1, arg2, res) ! LLVMIR-BE: store <2 x i64> %[[ld]], ptr %2, align 16 end subroutine vec_xl_be_testi64a -! CHECK-LABEL: @vec_xl_be_testf32a +! LLVM-LABEL: @vec_xl_be_testf32a subroutine vec_xl_be_testf32a(arg1, arg2, res) integer(2) :: arg1 real(4) :: arg2(4) @@ -782,7 +783,7 @@ subroutine vec_xl_be_testf32a(arg1, arg2, res) ! LLVMIR-BE: store <4 x float> %[[ld]], ptr %2, align 16 end subroutine vec_xl_be_testf32a -! CHECK-LABEL: @vec_xl_be_testf64a +! LLVM-LABEL: @vec_xl_be_testf64a subroutine vec_xl_be_testf64a(arg1, arg2, res) integer(8) :: arg1 real(8) :: arg2(7) @@ -801,7 +802,7 @@ end subroutine vec_xl_be_testf64a ! vec_xlw4 !---------------------- -! CHECK-LABEL: @vec_xlw4_testi8a +! LLVM-LABEL: @vec_xlw4_testi8a subroutine vec_xlw4_testi8a(arg1, arg2, res) integer(1) :: arg1 vector(integer(1)) :: arg2(2, 4, 8) @@ -815,7 +816,7 @@ subroutine vec_xlw4_testi8a(arg1, arg2, res) ! LLVMIR: store <16 x i8> %[[res]], ptr %2, align 16 end subroutine vec_xlw4_testi8a -! CHECK-LABEL: @vec_xlw4_testi16a +! LLVM-LABEL: @vec_xlw4_testi16a subroutine vec_xlw4_testi16a(arg1, arg2, res) integer(2) :: arg1 vector(integer(2)) :: arg2(2, 4, 8) @@ -829,7 +830,7 @@ subroutine vec_xlw4_testi16a(arg1, arg2, res) ! LLVMIR: store <8 x i16> %[[res]], ptr %2, align 16 end subroutine vec_xlw4_testi16a -! CHECK-LABEL: @vec_xlw4_testu32a +! LLVM-LABEL: @vec_xlw4_testu32a subroutine vec_xlw4_testu32a(arg1, arg2, res) integer(4) :: arg1 vector(unsigned(4)) :: arg2(2, 4, 8) @@ -842,7 +843,7 @@ subroutine vec_xlw4_testu32a(arg1, arg2, res) ! LLVMIR: store <4 x i32> %[[ld]], ptr %2, align 16 end subroutine vec_xlw4_testu32a -! CHECK-LABEL: @vec_xlw4_testf32a +! LLVM-LABEL: @vec_xlw4_testf32a subroutine vec_xlw4_testf32a(arg1, arg2, res) integer(2) :: arg1 vector(real(4)) :: arg2(4) diff --git a/flang/test/Lower/PowerPC/ppc-vec-shift-be-le.f90 b/flang/test/Lower/PowerPC/ppc-vec-shift-be-le.f90 index bd83f28b4eeb5..6c4f202f89a45 100644 --- a/flang/test/Lower/PowerPC/ppc-vec-shift-be-le.f90 +++ b/flang/test/Lower/PowerPC/ppc-vec-shift-be-le.f90 @@ -1,13 +1,13 @@ -! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="CHECK" %s +! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="LLVMIR","LLVM" %s ! -! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -triple ppc64-unknown-aix -o - | FileCheck --check-prefixes="BE-LLVMIR" %s +! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -triple ppc64-unknown-aix -o - | FileCheck --check-prefixes="BE-LLVMIR","LLVM" %s ! REQUIRES: target=powerpc{{.*}} !---------------------- ! vec_sld !---------------------- -! CHECK-LABEL: vec_sld_test_i1i1 +! LLVM-LABEL: vec_sld_test_i1i1 subroutine vec_sld_test_i1i1(arg1, arg2) vector(integer(1)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_1) @@ -23,7 +23,7 @@ subroutine vec_sld_test_i1i1(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_i1i1 -! CHECK-LABEL: vec_sld_test_i1i2 +! LLVM-LABEL: vec_sld_test_i1i2 subroutine vec_sld_test_i1i2(arg1, arg2) vector(integer(1)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_2) @@ -39,7 +39,7 @@ subroutine vec_sld_test_i1i2(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_i1i2 -! CHECK-LABEL: vec_sld_test_i1i4 +! LLVM-LABEL: vec_sld_test_i1i4 subroutine vec_sld_test_i1i4(arg1, arg2) vector(integer(1)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_4) @@ -55,7 +55,7 @@ subroutine vec_sld_test_i1i4(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_i1i4 -! CHECK-LABEL: vec_sld_test_i1i8 +! LLVM-LABEL: vec_sld_test_i1i8 subroutine vec_sld_test_i1i8(arg1, arg2) vector(integer(1)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_8) @@ -71,7 +71,7 @@ subroutine vec_sld_test_i1i8(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_i1i8 -! CHECK-LABEL: vec_sld_test_i2i1 +! LLVM-LABEL: vec_sld_test_i2i1 subroutine vec_sld_test_i2i1(arg1, arg2) vector(integer(2)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_1) @@ -93,7 +93,7 @@ subroutine vec_sld_test_i2i1(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_i2i1 -! CHECK-LABEL: vec_sld_test_i2i2 +! LLVM-LABEL: vec_sld_test_i2i2 subroutine vec_sld_test_i2i2(arg1, arg2) vector(integer(2)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 8_2) @@ -115,7 +115,7 @@ subroutine vec_sld_test_i2i2(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_i2i2 -! CHECK-LABEL: vec_sld_test_i2i4 +! LLVM-LABEL: vec_sld_test_i2i4 subroutine vec_sld_test_i2i4(arg1, arg2) vector(integer(2)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_4) @@ -137,7 +137,7 @@ subroutine vec_sld_test_i2i4(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_i2i4 -! CHECK-LABEL: vec_sld_test_i2i8 +! LLVM-LABEL: vec_sld_test_i2i8 subroutine vec_sld_test_i2i8(arg1, arg2) vector(integer(2)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 11_8) @@ -159,7 +159,7 @@ subroutine vec_sld_test_i2i8(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_i2i8 -! CHECK-LABEL: vec_sld_test_i4i1 +! LLVM-LABEL: vec_sld_test_i4i1 subroutine vec_sld_test_i4i1(arg1, arg2) vector(integer(4)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_1) @@ -181,7 +181,7 @@ subroutine vec_sld_test_i4i1(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_i4i1 -! CHECK-LABEL: vec_sld_test_i4i2 +! LLVM-LABEL: vec_sld_test_i4i2 subroutine vec_sld_test_i4i2(arg1, arg2) vector(integer(4)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_2) @@ -203,7 +203,7 @@ subroutine vec_sld_test_i4i2(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_i4i2 -! CHECK-LABEL: vec_sld_test_i4i4 +! LLVM-LABEL: vec_sld_test_i4i4 subroutine vec_sld_test_i4i4(arg1, arg2) vector(integer(4)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_4) @@ -225,7 +225,7 @@ subroutine vec_sld_test_i4i4(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_i4i4 -! CHECK-LABEL: vec_sld_test_i4i8 +! LLVM-LABEL: vec_sld_test_i4i8 subroutine vec_sld_test_i4i8(arg1, arg2) vector(integer(4)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_8) @@ -247,7 +247,7 @@ subroutine vec_sld_test_i4i8(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_i4i8 -! CHECK-LABEL: vec_sld_test_u1i1 +! LLVM-LABEL: vec_sld_test_u1i1 subroutine vec_sld_test_u1i1(arg1, arg2) vector(unsigned(1)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_1) @@ -263,7 +263,7 @@ subroutine vec_sld_test_u1i1(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_u1i1 -! CHECK-LABEL: vec_sld_test_u1i2 +! LLVM-LABEL: vec_sld_test_u1i2 subroutine vec_sld_test_u1i2(arg1, arg2) vector(unsigned(1)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_2) @@ -279,7 +279,7 @@ subroutine vec_sld_test_u1i2(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_u1i2 -! CHECK-LABEL: vec_sld_test_u1i4 +! LLVM-LABEL: vec_sld_test_u1i4 subroutine vec_sld_test_u1i4(arg1, arg2) vector(unsigned(1)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_1) @@ -295,7 +295,7 @@ subroutine vec_sld_test_u1i4(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_u1i4 -! CHECK-LABEL: vec_sld_test_u1i8 +! LLVM-LABEL: vec_sld_test_u1i8 subroutine vec_sld_test_u1i8(arg1, arg2) vector(unsigned(1)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_1) @@ -311,7 +311,7 @@ subroutine vec_sld_test_u1i8(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_u1i8 -! CHECK-LABEL: vec_sld_test_u2i1 +! LLVM-LABEL: vec_sld_test_u2i1 subroutine vec_sld_test_u2i1(arg1, arg2) vector(unsigned(2)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_1) @@ -333,7 +333,7 @@ subroutine vec_sld_test_u2i1(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_u2i1 -! CHECK-LABEL: vec_sld_test_u2i2 +! LLVM-LABEL: vec_sld_test_u2i2 subroutine vec_sld_test_u2i2(arg1, arg2) vector(unsigned(2)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_2) @@ -355,7 +355,7 @@ subroutine vec_sld_test_u2i2(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_u2i2 -! CHECK-LABEL: vec_sld_test_u2i4 +! LLVM-LABEL: vec_sld_test_u2i4 subroutine vec_sld_test_u2i4(arg1, arg2) vector(unsigned(2)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_4) @@ -377,7 +377,7 @@ subroutine vec_sld_test_u2i4(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_u2i4 -! CHECK-LABEL: vec_sld_test_u2i8 +! LLVM-LABEL: vec_sld_test_u2i8 subroutine vec_sld_test_u2i8(arg1, arg2) vector(unsigned(2)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_8) @@ -399,7 +399,7 @@ subroutine vec_sld_test_u2i8(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_u2i8 -! CHECK-LABEL: vec_sld_test_u4i1 +! LLVM-LABEL: vec_sld_test_u4i1 subroutine vec_sld_test_u4i1(arg1, arg2) vector(unsigned(4)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_1) @@ -421,7 +421,7 @@ subroutine vec_sld_test_u4i1(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_u4i1 -! CHECK-LABEL: vec_sld_test_u4i2 +! LLVM-LABEL: vec_sld_test_u4i2 subroutine vec_sld_test_u4i2(arg1, arg2) vector(unsigned(4)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_2) @@ -443,7 +443,7 @@ subroutine vec_sld_test_u4i2(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_u4i2 -! CHECK-LABEL: vec_sld_test_u4i4 +! LLVM-LABEL: vec_sld_test_u4i4 subroutine vec_sld_test_u4i4(arg1, arg2) vector(unsigned(4)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_4) @@ -465,7 +465,7 @@ subroutine vec_sld_test_u4i4(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_u4i4 -! CHECK-LABEL: vec_sld_test_u4i8 +! LLVM-LABEL: vec_sld_test_u4i8 subroutine vec_sld_test_u4i8(arg1, arg2) vector(unsigned(4)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_8) @@ -487,7 +487,7 @@ subroutine vec_sld_test_u4i8(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_u4i8 -! CHECK-LABEL: vec_sld_test_r4i1 +! LLVM-LABEL: vec_sld_test_r4i1 subroutine vec_sld_test_r4i1(arg1, arg2) vector(real(4)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_1) @@ -509,7 +509,7 @@ subroutine vec_sld_test_r4i1(arg1, arg2) ! BE-LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_r4i1 -! CHECK-LABEL: vec_sld_test_r4i2 +! LLVM-LABEL: vec_sld_test_r4i2 subroutine vec_sld_test_r4i2(arg1, arg2) vector(real(4)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_2) @@ -531,7 +531,7 @@ subroutine vec_sld_test_r4i2(arg1, arg2) ! BE-LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_r4i2 -! CHECK-LABEL: vec_sld_test_r4i4 +! LLVM-LABEL: vec_sld_test_r4i4 subroutine vec_sld_test_r4i4(arg1, arg2) vector(real(4)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_4) @@ -553,7 +553,7 @@ subroutine vec_sld_test_r4i4(arg1, arg2) ! BE-LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_r4i4 -! CHECK-LABEL: vec_sld_test_r4i8 +! LLVM-LABEL: vec_sld_test_r4i8 subroutine vec_sld_test_r4i8(arg1, arg2) vector(real(4)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 1_8) @@ -578,7 +578,7 @@ end subroutine vec_sld_test_r4i8 !---------------------- ! vec_sldw !---------------------- -! CHECK-LABEL: vec_sldw_test_i1i1 +! LLVM-LABEL: vec_sldw_test_i1i1 subroutine vec_sldw_test_i1i1(arg1, arg2) vector(integer(1)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_1) @@ -594,7 +594,7 @@ subroutine vec_sldw_test_i1i1(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i1i1 -! CHECK-LABEL: vec_sldw_test_i1i2 +! LLVM-LABEL: vec_sldw_test_i1i2 subroutine vec_sldw_test_i1i2(arg1, arg2) vector(integer(1)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_2) @@ -610,7 +610,7 @@ subroutine vec_sldw_test_i1i2(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i1i2 -! CHECK-LABEL: vec_sldw_test_i1i4 +! LLVM-LABEL: vec_sldw_test_i1i4 subroutine vec_sldw_test_i1i4(arg1, arg2) vector(integer(1)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_4) @@ -626,7 +626,7 @@ subroutine vec_sldw_test_i1i4(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i1i4 -! CHECK-LABEL: vec_sldw_test_i1i8 +! LLVM-LABEL: vec_sldw_test_i1i8 subroutine vec_sldw_test_i1i8(arg1, arg2) vector(integer(1)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_8) @@ -642,7 +642,7 @@ subroutine vec_sldw_test_i1i8(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i1i8 -! CHECK-LABEL: vec_sldw_test_i2i1 +! LLVM-LABEL: vec_sldw_test_i2i1 subroutine vec_sldw_test_i2i1(arg1, arg2) vector(integer(2)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_1) @@ -664,7 +664,7 @@ subroutine vec_sldw_test_i2i1(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i2i1 -! CHECK-LABEL: vec_sldw_test_i2i2 +! LLVM-LABEL: vec_sldw_test_i2i2 subroutine vec_sldw_test_i2i2(arg1, arg2) vector(integer(2)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_2) @@ -686,7 +686,7 @@ subroutine vec_sldw_test_i2i2(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i2i2 -! CHECK-LABEL: vec_sldw_test_i2i4 +! LLVM-LABEL: vec_sldw_test_i2i4 subroutine vec_sldw_test_i2i4(arg1, arg2) vector(integer(2)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_4) @@ -708,7 +708,7 @@ subroutine vec_sldw_test_i2i4(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i2i4 -! CHECK-LABEL: vec_sldw_test_i2i8 +! LLVM-LABEL: vec_sldw_test_i2i8 subroutine vec_sldw_test_i2i8(arg1, arg2) vector(integer(2)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_8) @@ -730,7 +730,7 @@ subroutine vec_sldw_test_i2i8(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i2i8 -! CHECK-LABEL: vec_sldw_test_i4i1 +! LLVM-LABEL: vec_sldw_test_i4i1 subroutine vec_sldw_test_i4i1(arg1, arg2) vector(integer(4)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_1) @@ -752,7 +752,7 @@ subroutine vec_sldw_test_i4i1(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i4i1 -! CHECK-LABEL: vec_sldw_test_i4i2 +! LLVM-LABEL: vec_sldw_test_i4i2 subroutine vec_sldw_test_i4i2(arg1, arg2) vector(integer(4)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_2) @@ -774,7 +774,7 @@ subroutine vec_sldw_test_i4i2(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i4i2 -! CHECK-LABEL: vec_sldw_test_i4i4 +! LLVM-LABEL: vec_sldw_test_i4i4 subroutine vec_sldw_test_i4i4(arg1, arg2) vector(integer(4)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_4) @@ -796,7 +796,7 @@ subroutine vec_sldw_test_i4i4(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i4i4 -! CHECK-LABEL: vec_sldw_test_i4i8 +! LLVM-LABEL: vec_sldw_test_i4i8 subroutine vec_sldw_test_i4i8(arg1, arg2) vector(integer(4)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_8) @@ -818,7 +818,7 @@ subroutine vec_sldw_test_i4i8(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i4i8 -! CHECK-LABEL: vec_sldw_test_i8i1 +! LLVM-LABEL: vec_sldw_test_i8i1 subroutine vec_sldw_test_i8i1(arg1, arg2) vector(integer(8)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_1) @@ -840,7 +840,7 @@ subroutine vec_sldw_test_i8i1(arg1, arg2) ! BE-LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i8i1 -! CHECK-LABEL: vec_sldw_test_i8i2 +! LLVM-LABEL: vec_sldw_test_i8i2 subroutine vec_sldw_test_i8i2(arg1, arg2) vector(integer(8)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_2) @@ -862,7 +862,7 @@ subroutine vec_sldw_test_i8i2(arg1, arg2) ! BE-LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i8i2 -! CHECK-LABEL: vec_sldw_test_i8i4 +! LLVM-LABEL: vec_sldw_test_i8i4 subroutine vec_sldw_test_i8i4(arg1, arg2) vector(integer(8)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_4) @@ -884,7 +884,7 @@ subroutine vec_sldw_test_i8i4(arg1, arg2) ! BE-LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i8i4 -! CHECK-LABEL: vec_sldw_test_i8i8 +! LLVM-LABEL: vec_sldw_test_i8i8 subroutine vec_sldw_test_i8i8(arg1, arg2) vector(integer(8)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_8) @@ -907,7 +907,7 @@ subroutine vec_sldw_test_i8i8(arg1, arg2) end subroutine vec_sldw_test_i8i8 -! CHECK-LABEL: vec_sldw_test_u1i1 +! LLVM-LABEL: vec_sldw_test_u1i1 subroutine vec_sldw_test_u1i1(arg1, arg2) vector(unsigned(1)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_1) @@ -923,7 +923,7 @@ subroutine vec_sldw_test_u1i1(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u1i1 -! CHECK-LABEL: vec_sldw_test_u1i2 +! LLVM-LABEL: vec_sldw_test_u1i2 subroutine vec_sldw_test_u1i2(arg1, arg2) vector(unsigned(1)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_2) @@ -939,7 +939,7 @@ subroutine vec_sldw_test_u1i2(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u1i2 -! CHECK-LABEL: vec_sldw_test_u1i4 +! LLVM-LABEL: vec_sldw_test_u1i4 subroutine vec_sldw_test_u1i4(arg1, arg2) vector(unsigned(1)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_4) @@ -955,7 +955,7 @@ subroutine vec_sldw_test_u1i4(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u1i4 -! CHECK-LABEL: vec_sldw_test_u1i8 +! LLVM-LABEL: vec_sldw_test_u1i8 subroutine vec_sldw_test_u1i8(arg1, arg2) vector(unsigned(1)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_8) @@ -971,7 +971,7 @@ subroutine vec_sldw_test_u1i8(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u1i8 -! CHECK-LABEL: vec_sldw_test_u2i1 +! LLVM-LABEL: vec_sldw_test_u2i1 subroutine vec_sldw_test_u2i1(arg1, arg2) vector(unsigned(2)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_1) @@ -993,7 +993,7 @@ subroutine vec_sldw_test_u2i1(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u2i1 -! CHECK-LABEL: vec_sldw_test_u2i2 +! LLVM-LABEL: vec_sldw_test_u2i2 subroutine vec_sldw_test_u2i2(arg1, arg2) vector(unsigned(2)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_2) @@ -1015,7 +1015,7 @@ subroutine vec_sldw_test_u2i2(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u2i2 -! CHECK-LABEL: vec_sldw_test_u2i4 +! LLVM-LABEL: vec_sldw_test_u2i4 subroutine vec_sldw_test_u2i4(arg1, arg2) vector(unsigned(2)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_4) @@ -1037,7 +1037,7 @@ subroutine vec_sldw_test_u2i4(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u2i4 -! CHECK-LABEL: vec_sldw_test_u2i8 +! LLVM-LABEL: vec_sldw_test_u2i8 subroutine vec_sldw_test_u2i8(arg1, arg2) vector(unsigned(2)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_8) @@ -1059,7 +1059,7 @@ subroutine vec_sldw_test_u2i8(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u2i8 -! CHECK-LABEL: vec_sldw_test_u4i1 +! LLVM-LABEL: vec_sldw_test_u4i1 subroutine vec_sldw_test_u4i1(arg1, arg2) vector(unsigned(4)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_1) @@ -1081,7 +1081,7 @@ subroutine vec_sldw_test_u4i1(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u4i1 -! CHECK-LABEL: vec_sldw_test_u4i2 +! LLVM-LABEL: vec_sldw_test_u4i2 subroutine vec_sldw_test_u4i2(arg1, arg2) vector(unsigned(4)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_2) @@ -1103,7 +1103,7 @@ subroutine vec_sldw_test_u4i2(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u4i2 -! CHECK-LABEL: vec_sldw_test_u4i4 +! LLVM-LABEL: vec_sldw_test_u4i4 subroutine vec_sldw_test_u4i4(arg1, arg2) vector(unsigned(4)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_4) @@ -1125,7 +1125,7 @@ subroutine vec_sldw_test_u4i4(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u4i4 -! CHECK-LABEL: vec_sldw_test_u4i8 +! LLVM-LABEL: vec_sldw_test_u4i8 subroutine vec_sldw_test_u4i8(arg1, arg2) vector(unsigned(4)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_8) @@ -1147,7 +1147,7 @@ subroutine vec_sldw_test_u4i8(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u4i8 -! CHECK-LABEL: vec_sldw_test_u8i1 +! LLVM-LABEL: vec_sldw_test_u8i1 subroutine vec_sldw_test_u8i1(arg1, arg2) vector(unsigned(8)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_1) @@ -1169,7 +1169,7 @@ subroutine vec_sldw_test_u8i1(arg1, arg2) ! BE-LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u8i1 -! CHECK-LABEL: vec_sldw_test_u8i2 +! LLVM-LABEL: vec_sldw_test_u8i2 subroutine vec_sldw_test_u8i2(arg1, arg2) vector(unsigned(8)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_2) @@ -1191,7 +1191,7 @@ subroutine vec_sldw_test_u8i2(arg1, arg2) ! BE-LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u8i2 -! CHECK-LABEL: vec_sldw_test_u8i4 +! LLVM-LABEL: vec_sldw_test_u8i4 subroutine vec_sldw_test_u8i4(arg1, arg2) vector(unsigned(8)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_4) @@ -1213,7 +1213,7 @@ subroutine vec_sldw_test_u8i4(arg1, arg2) ! BE-LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u8i4 -! CHECK-LABEL: vec_sldw_test_u8i8 +! LLVM-LABEL: vec_sldw_test_u8i8 subroutine vec_sldw_test_u8i8(arg1, arg2) vector(unsigned(8)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_8) @@ -1235,7 +1235,7 @@ subroutine vec_sldw_test_u8i8(arg1, arg2) ! BE-LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u8i8 -! CHECK-LABEL: vec_sldw_test_r4i1 +! LLVM-LABEL: vec_sldw_test_r4i1 subroutine vec_sldw_test_r4i1(arg1, arg2) vector(real(4)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_1) @@ -1257,7 +1257,7 @@ subroutine vec_sldw_test_r4i1(arg1, arg2) ! BE-LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_r4i1 -! CHECK-LABEL: vec_sldw_test_r4i2 +! LLVM-LABEL: vec_sldw_test_r4i2 subroutine vec_sldw_test_r4i2(arg1, arg2) vector(real(4)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_2) @@ -1279,7 +1279,7 @@ subroutine vec_sldw_test_r4i2(arg1, arg2) ! BE-LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_r4i2 -! CHECK-LABEL: vec_sldw_test_r4i4 +! LLVM-LABEL: vec_sldw_test_r4i4 subroutine vec_sldw_test_r4i4(arg1, arg2) vector(real(4)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_4) @@ -1301,7 +1301,7 @@ subroutine vec_sldw_test_r4i4(arg1, arg2) ! BE-LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_r4i4 -! CHECK-LABEL: vec_sldw_test_r4i8 +! LLVM-LABEL: vec_sldw_test_r4i8 subroutine vec_sldw_test_r4i8(arg1, arg2) vector(real(4)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_8) @@ -1323,7 +1323,7 @@ subroutine vec_sldw_test_r4i8(arg1, arg2) ! BE-LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_r4i8 -! CHECK-LABEL: vec_sldw_test_r8i1 +! LLVM-LABEL: vec_sldw_test_r8i1 subroutine vec_sldw_test_r8i1(arg1, arg2) vector(real(8)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_1) @@ -1345,7 +1345,7 @@ subroutine vec_sldw_test_r8i1(arg1, arg2) ! BE-LLVMIR: store <2 x double> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_r8i1 -! CHECK-LABEL: vec_sldw_test_r8i2 +! LLVM-LABEL: vec_sldw_test_r8i2 subroutine vec_sldw_test_r8i2(arg1, arg2) vector(real(8)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_2) @@ -1367,7 +1367,7 @@ subroutine vec_sldw_test_r8i2(arg1, arg2) ! BE-LLVMIR: store <2 x double> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_r8i2 -! CHECK-LABEL: vec_sldw_test_r8i4 +! LLVM-LABEL: vec_sldw_test_r8i4 subroutine vec_sldw_test_r8i4(arg1, arg2) vector(real(8)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_4) @@ -1389,7 +1389,7 @@ subroutine vec_sldw_test_r8i4(arg1, arg2) ! BE-LLVMIR: store <2 x double> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_r8i4 -! CHECK-LABEL: vec_sldw_test_r8i8 +! LLVM-LABEL: vec_sldw_test_r8i8 subroutine vec_sldw_test_r8i8(arg1, arg2) vector(real(8)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_8) diff --git a/libc/hdr/CMakeLists.txt b/libc/hdr/CMakeLists.txt index 91b8cb71552a7..66b82c84dac49 100644 --- a/libc/hdr/CMakeLists.txt +++ b/libc/hdr/CMakeLists.txt @@ -87,4 +87,14 @@ add_proxy_header_library( libc.include.llvm-libc-macros.time_macros ) +add_proxy_header_library( + float_macros + HDRS + float_macros.h + DEPENDS + libc.include.llvm-libc-macros.float_macros + FULL_BUILD_DEPENDS + libc.include.float +) + add_subdirectory(types) diff --git a/libc/hdr/float_macros.h b/libc/hdr/float_macros.h new file mode 100644 index 0000000000000..a0ef5e29b9868 --- /dev/null +++ b/libc/hdr/float_macros.h @@ -0,0 +1,22 @@ +//===-- Definition of macros from math.h ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_HDR_FLOAT_MACROS_H +#define LLVM_LIBC_HDR_FLOAT_MACROS_H + +#ifdef LIBC_FULL_BUILD + +#include "include/llvm-libc-macros/float-macros.h" + +#else // Overlay mode + +#include + +#endif // LLVM_LIBC_FULL_BUILD + +#endif // LLVM_LIBC_HDR_FLOAT_MACROS_H diff --git a/libc/include/llvm-libc-macros/float-macros.h b/libc/include/llvm-libc-macros/float-macros.h index 4fe8590c5f70c..81c1df868bf6c 100644 --- a/libc/include/llvm-libc-macros/float-macros.h +++ b/libc/include/llvm-libc-macros/float-macros.h @@ -9,21 +9,6 @@ #ifndef LLVM_LIBC_MACROS_FLOAT_MACROS_H #define LLVM_LIBC_MACROS_FLOAT_MACROS_H -// Suppress `#include_next is a language extension` warnings. -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wgnu-include-next" -#pragma clang diagnostic ignored "-Winclude-next-absolute-path" -#else // gcc -#pragma GCC system_header -#endif //__clang__ - -#include_next - -#ifdef __clang__ -#pragma clang diagnostic pop -#endif //__clang__ - #ifndef FLT_RADIX #define FLT_RADIX __FLT_RADIX__ #endif // FLT_RADIX @@ -32,9 +17,13 @@ #define FLT_EVAL_METHOD __FLT_EVAL_METHOD__ #endif // FLT_EVAL_METHOD -#ifndef DECIMAL_DIG -#define DECIMAL_DIG __DECIMAL_DIG__ -#endif // DECIMAL_DIG +#ifndef FLT_ROUNDS +#if __has_builtin(__builtin_flt_rounds) +#define FLT_ROUNDS __builtin_flt_rounds() +#else +#define FLT_ROUNDS 1 +#endif +#endif // FLT_ROUNDS #ifndef FLT_DECIMAL_DIG #define FLT_DECIMAL_DIG __FLT_DECIMAL_DIG__ @@ -48,6 +37,10 @@ #define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__ #endif // LDBL_DECIMAL_DIG +#ifndef DECIMAL_DIG +#define DECIMAL_DIG __DECIMAL_DIG__ +#endif // DECIMAL_DIG + #ifndef FLT_DIG #define FLT_DIG __FLT_DIG__ #endif // FLT_DIG @@ -97,15 +90,15 @@ #endif // LDBL_MAX #ifndef FLT_TRUE_MIN -#define FLT_TRUE_MIN __FLT_TRUE_MIN__ +#define FLT_TRUE_MIN __FLT_DENORM_MIN__ #endif // FLT_TRUE_MIN #ifndef DBL_TRUE_MIN -#define DBL_TRUE_MIN __DBL_TRUE_MIN__ +#define DBL_TRUE_MIN __DBL_DENORM_MIN__ #endif // DBL_TRUE_MIN #ifndef LDBL_TRUE_MIN -#define LDBL_TRUE_MIN __LDBL_TRUE_MIN__ +#define LDBL_TRUE_MIN __LDBL_DENORM_MIN__ #endif // LDBL_TRUE_MIN #ifndef FLT_EPSILON diff --git a/libc/src/__support/macros/properties/CMakeLists.txt b/libc/src/__support/macros/properties/CMakeLists.txt index bbc45650f3fca..7718aeaa3de5a 100644 --- a/libc/src/__support/macros/properties/CMakeLists.txt +++ b/libc/src/__support/macros/properties/CMakeLists.txt @@ -33,6 +33,6 @@ add_header_library( .compiler .cpu_features .os - libc.include.llvm-libc-macros.float_macros + libc.hdr.float_macros libc.include.llvm-libc-types.float128 ) diff --git a/libc/src/__support/macros/properties/types.h b/libc/src/__support/macros/properties/types.h index d43cf99e6859b..781cf1b7a2b62 100644 --- a/libc/src/__support/macros/properties/types.h +++ b/libc/src/__support/macros/properties/types.h @@ -10,7 +10,7 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_MACROS_PROPERTIES_TYPES_H #define LLVM_LIBC_SRC___SUPPORT_MACROS_PROPERTIES_TYPES_H -#include "include/llvm-libc-macros/float-macros.h" // LDBL_MANT_DIG +#include "hdr/float_macros.h" // LDBL_MANT_DIG #include "include/llvm-libc-types/float128.h" // float128 #include "src/__support/macros/properties/architectures.h" #include "src/__support/macros/properties/compiler.h" diff --git a/libc/src/__support/threads/linux/CMakeLists.txt b/libc/src/__support/threads/linux/CMakeLists.txt index 39c4ad20201ca..f6913ef083428 100644 --- a/libc/src/__support/threads/linux/CMakeLists.txt +++ b/libc/src/__support/threads/linux/CMakeLists.txt @@ -75,4 +75,5 @@ add_object_library( libc.src.__support.OSUtil.osutil libc.src.__support.threads.linux.futex_word_type libc.src.__support.threads.mutex + libc.src.__support.CPP.mutex ) diff --git a/libc/src/__support/threads/linux/CndVar.cpp b/libc/src/__support/threads/linux/CndVar.cpp index daf56bca1ed21..b3a0fdbda4e9e 100644 --- a/libc/src/__support/threads/linux/CndVar.cpp +++ b/libc/src/__support/threads/linux/CndVar.cpp @@ -7,9 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/__support/threads/CndVar.h" +#include "src/__support/CPP/mutex.h" #include "src/__support/OSUtil/syscall.h" // syscall_impl #include "src/__support/threads/linux/futex_word.h" // FutexWordType -#include "src/__support/threads/mutex.h" // Mutex, MutexLock +#include "src/__support/threads/mutex.h" // Mutex #include // For syscall numbers. @@ -27,7 +28,7 @@ int CndVar::wait(Mutex *m) { CndWaiter waiter; { - MutexLock ml(&qmtx); + cpp::lock_guard ml(qmtx); CndWaiter *old_back = nullptr; if (waitq_front == nullptr) { waitq_front = waitq_back = &waiter; @@ -83,7 +84,7 @@ void CndVar::notify_one() { } void CndVar::broadcast() { - MutexLock ml(&qmtx); + cpp::lock_guard ml(qmtx); uint32_t dummy_futex_word; CndWaiter *waiter = waitq_front; waitq_front = waitq_back = nullptr; diff --git a/libc/src/__support/threads/mutex.h b/libc/src/__support/threads/mutex.h index 9dded2e3f952a..392b38984dc0a 100644 --- a/libc/src/__support/threads/mutex.h +++ b/libc/src/__support/threads/mutex.h @@ -43,18 +43,4 @@ #include "src/__support/threads/gpu/mutex.h" #endif // __linux__ -namespace LIBC_NAMESPACE { - -// An RAII class for easy locking and unlocking of mutexes. -class MutexLock { - Mutex *mutex; - -public: - explicit MutexLock(Mutex *m) : mutex(m) { mutex->lock(); } - - ~MutexLock() { mutex->unlock(); } -}; - -} // namespace LIBC_NAMESPACE - #endif // LLVM_LIBC_SRC___SUPPORT_THREADS_MUTEX_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index daaf505008ca1..269bc6be5d834 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -2933,6 +2933,7 @@ add_entrypoint_object( HDRS ../scalbn.h DEPENDS + libc.hdr.float_macros libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS -O3 @@ -2945,6 +2946,7 @@ add_entrypoint_object( HDRS ../scalbnf.h DEPENDS + libc.hdr.float_macros libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS -O3 @@ -2957,6 +2959,7 @@ add_entrypoint_object( HDRS ../scalbnl.h DEPENDS + libc.hdr.float_macros libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS -O3 @@ -2969,6 +2972,7 @@ add_entrypoint_object( HDRS ../scalbnf128.h DEPENDS + libc.hdr.float_macros libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS diff --git a/libc/src/math/generic/scalbn.cpp b/libc/src/math/generic/scalbn.cpp index 3908f5892f144..207cce1550bc0 100644 --- a/libc/src/math/generic/scalbn.cpp +++ b/libc/src/math/generic/scalbn.cpp @@ -7,19 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/math/scalbn.h" +#include "hdr/float_macros.h" #include "src/__support/FPUtil/ManipulationFunctions.h" #include "src/__support/common.h" +#if FLT_RADIX != 2 +#error "FLT_RADIX != 2 is not supported." +#endif + namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(double, scalbn, (double x, int n)) { -#if !defined(__FLT_RADIX__) -#error __FLT_RADIX__ undefined. -#elif __FLT_RADIX__ != 2 -#error __FLT_RADIX__!=2, unimplemented. -#else return fputil::ldexp(x, n); -#endif } } // namespace LIBC_NAMESPACE diff --git a/libc/src/math/generic/scalbnf.cpp b/libc/src/math/generic/scalbnf.cpp index 4a4fa86dcfd89..e478088d3ce5a 100644 --- a/libc/src/math/generic/scalbnf.cpp +++ b/libc/src/math/generic/scalbnf.cpp @@ -7,19 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/math/scalbnf.h" +#include "hdr/float_macros.h" #include "src/__support/FPUtil/ManipulationFunctions.h" #include "src/__support/common.h" +#if FLT_RADIX != 2 +#error "FLT_RADIX != 2 is not supported." +#endif + namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(float, scalbnf, (float x, int n)) { -#if !defined(__FLT_RADIX__) -#error __FLT_RADIX__ undefined. -#elif __FLT_RADIX__ != 2 -#error __FLT_RADIX__!=2, unimplemented. -#else return fputil::ldexp(x, n); -#endif } } // namespace LIBC_NAMESPACE diff --git a/libc/src/math/generic/scalbnf128.cpp b/libc/src/math/generic/scalbnf128.cpp index be3d29ed27e98..5fd59611d53de 100644 --- a/libc/src/math/generic/scalbnf128.cpp +++ b/libc/src/math/generic/scalbnf128.cpp @@ -7,21 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/math/scalbnf128.h" +#include "hdr/float_macros.h" #include "src/__support/FPUtil/ManipulationFunctions.h" #include "src/__support/common.h" +#if FLT_RADIX != 2 +#error "FLT_RADIX != 2 is not supported." +#endif + namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(float128, scalbnf128, (float128 x, int n)) { -// TODO: should be switched to use `FLT_RADIX` in hdr/float_macros.h" instead -// see: https://github.com/llvm/llvm-project/issues/90496 -#if !defined(__FLT_RADIX__) -#error __FLT_RADIX__ undefined. -#elif __FLT_RADIX__ != 2 -#error __FLT_RADIX__!=2, unimplemented. -#else return fputil::ldexp(x, n); -#endif } } // namespace LIBC_NAMESPACE diff --git a/libc/src/math/generic/scalbnl.cpp b/libc/src/math/generic/scalbnl.cpp index 681338ec01f07..1225a7ebaf572 100644 --- a/libc/src/math/generic/scalbnl.cpp +++ b/libc/src/math/generic/scalbnl.cpp @@ -7,19 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/math/scalbnl.h" +#include "hdr/float_macros.h" #include "src/__support/FPUtil/ManipulationFunctions.h" #include "src/__support/common.h" +#if FLT_RADIX != 2 +#error "FLT_RADIX != 2 is not supported." +#endif + namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(long double, scalbnl, (long double x, int n)) { -#if !defined(__FLT_RADIX__) -#error __FLT_RADIX__ undefined. -#elif __FLT_RADIX__ != 2 -#error __FLT_RADIX__!=2, unimplemented. -#else return fputil::ldexp(x, n); -#endif } } // namespace LIBC_NAMESPACE diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index cb5e0e5e6cdb5..a061fda88b5c6 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -122,7 +122,7 @@ option(LIBCXX_ENABLE_VENDOR_AVAILABILITY_ANNOTATIONS on definitions in a shared library. By default, we assume that we're not building libc++ for any specific vendor, and we disable those annotations. Vendors wishing to provide compile-time errors when using features unavailable on some version of - the shared library they shipped should turn this on and see `include/__availability` + the shared library they shipped should turn this on and see `include/__configuration/availability.h` for more details." OFF) if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") @@ -856,15 +856,14 @@ endfunction() #=============================================================================== # Setup Source Code And Tests #=============================================================================== +add_custom_target(cxx-test-depends + COMMENT "Build dependencies required to run the libc++ test suite.") + add_subdirectory(include) add_subdirectory(src) add_subdirectory(utils) add_subdirectory(modules) -set(LIBCXX_TEST_DEPS "cxx_experimental") - -list(APPEND LIBCXX_TEST_DEPS generate-cxx-modules) - if (LIBCXX_INCLUDE_BENCHMARKS) add_subdirectory(benchmarks) endif() diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt index 93b549a316e38..2101f9c71788c 100644 --- a/libcxx/benchmarks/CMakeLists.txt +++ b/libcxx/benchmarks/CMakeLists.txt @@ -252,10 +252,6 @@ endforeach() if (LIBCXX_INCLUDE_TESTS) include(AddLLVM) - if (NOT DEFINED LIBCXX_TEST_DEPS) - message(FATAL_ERROR "Expected LIBCXX_TEST_DEPS to be defined") - endif() - configure_lit_site_cfg( ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py) @@ -265,6 +261,6 @@ if (LIBCXX_INCLUDE_TESTS) add_lit_target(check-cxx-benchmarks "Running libcxx benchmarks tests" ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS cxx-benchmarks ${LIBCXX_TEST_DEPS} + DEPENDS cxx-benchmarks cxx-test-depends ARGS ${BENCHMARK_LIT_ARGS}) endif() diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst index 17d2da907692e..0297068785e8b 100644 --- a/libcxx/docs/FeatureTestMacroTable.rst +++ b/libcxx/docs/FeatureTestMacroTable.rst @@ -326,8 +326,6 @@ Status ---------------------------------------------------------- ----------------- ``__cpp_lib_expected`` ``202211L`` ---------------------------------------------------------- ----------------- - ``__cpp_lib_format_path`` *unimplemented* - ---------------------------------------------------------- ----------------- ``__cpp_lib_format_ranges`` ``202207L`` ---------------------------------------------------------- ----------------- ``__cpp_lib_formatters`` *unimplemented* @@ -386,8 +384,6 @@ Status ---------------------------------------------------------- ----------------- ``__cpp_lib_string_resize_and_overwrite`` ``202110L`` ---------------------------------------------------------- ----------------- - ``__cpp_lib_to_string`` *unimplemented* - ---------------------------------------------------------- ----------------- ``__cpp_lib_to_underlying`` ``202102L`` ---------------------------------------------------------- ----------------- ``__cpp_lib_tuple_like`` *unimplemented* @@ -412,6 +408,8 @@ Status ---------------------------------------------------------- ----------------- ``__cpp_lib_default_template_type_for_algorithm_values`` *unimplemented* ---------------------------------------------------------- ----------------- + ``__cpp_lib_format_path`` *unimplemented* + ---------------------------------------------------------- ----------------- ``__cpp_lib_freestanding_algorithm`` *unimplemented* ---------------------------------------------------------- ----------------- ``__cpp_lib_freestanding_array`` *unimplemented* @@ -466,6 +464,8 @@ Status ---------------------------------------------------------- ----------------- ``__cpp_lib_to_chars`` *unimplemented* ---------------------------------------------------------- ----------------- + ``__cpp_lib_to_string`` *unimplemented* + ---------------------------------------------------------- ----------------- ``__cpp_lib_tuple_like`` *unimplemented* ========================================================== ================= diff --git a/libcxx/docs/Status/Cxx20Issues.csv b/libcxx/docs/Status/Cxx20Issues.csv index 5f83fa3a92e87..54517ab002b86 100644 --- a/libcxx/docs/Status/Cxx20Issues.csv +++ b/libcxx/docs/Status/Cxx20Issues.csv @@ -200,9 +200,9 @@ "`3200 `__","``midpoint``\ should not constrain ``T``\ is complete","Prague","|Nothing To Do|","" "`3201 `__","``lerp``\ should be marked as ``noexcept``\ ","Prague","|Complete|","" "`3226 `__","``zoned_time``\ constructor from ``string_view``\ should accept ``zoned_time``\ ","Prague","","","|chrono|" -"`3233 `__","Broken requirements for ``shared_ptr``\ converting constructors","Prague","","" +"`3233 `__","Broken requirements for ``shared_ptr``\ converting constructors","Prague","|Complete|","19.0" "`3237 `__","LWG 3038 and 3190 have inconsistent PRs","Prague","|Complete|","16.0" -"`3238 `__","Insufficiently-defined behavior of ``std::function``\ deduction guides","Prague","","" +"`3238 `__","Insufficiently-defined behavior of ``std::function``\ deduction guides","Prague","|Nothing To Do|","" "`3242 `__","``std::format``\ : missing rules for ``arg-id``\ in ``width``\ and ``precision``\ ","Prague","|Complete|","14.0","|format|" "`3243 `__","``std::format``\ and negative zeroes","Prague","|Complete|","14.0","|format|" "`3247 `__","``ranges::iter_move``\ should perform ADL-only lookup of ``iter_move``\ ","Prague","|Complete|","15.0","|ranges|" @@ -285,7 +285,7 @@ "`3379 `__","""``safe``\ "" in several library names is misleading","Prague","|Complete|","15.0","|ranges|" "`3380 `__","``common_type``\ and comparison categories","Prague","|Complete|","15.0","|spaceship|" "`3381 `__","``begin``\ and ``data``\ must agree for ``contiguous_range``\ ","Prague","|Nothing To Do|","","|ranges|" -"`3382 `__","NTTP for ``pair``\ and ``array``\ ","Prague","","" +"`3382 `__","NTTP for ``pair``\ and ``array``\ ","Prague","|Nothing To Do|","" "`3383 `__","|sect|\ [time.zone.leap.nonmembers] ``sys_seconds``\ should be replaced with ``seconds``\ ","Prague","|Complete|","19.0","|chrono|" "`3384 `__","``transform_view::*sentinel*``\ has an incorrect ``operator-``\ ","Prague","|Complete|","15.0","|ranges|" "`3385 `__","``common_iterator``\ is not sufficiently constrained for non-copyable iterators","Prague","|Complete|","15.0","|ranges|" diff --git a/libcxx/docs/Status/Cxx2cIssues.csv b/libcxx/docs/Status/Cxx2cIssues.csv index 76717e1d3448a..8d24457186310 100644 --- a/libcxx/docs/Status/Cxx2cIssues.csv +++ b/libcxx/docs/Status/Cxx2cIssues.csv @@ -29,7 +29,7 @@ "`3947 `__","Unexpected constraints on ``adjacent_transform_view::base()``","Kona November 2023","","","|ranges|" "`3948 `__","``possibly-const-range and as-const-pointer`` should be ``noexcept``","Kona November 2023","","","|ranges|" "`3949 `__","``std::atomic``'s trivial destructor dropped in C++17 spec wording","Kona November 2023","","","" -"`3951 `__","[expected.object.swap]: Using ``value()`` instead of ``has_value()``","Kona November 2023","","","" +"`3951 `__","[expected.object.swap]: Using ``value()`` instead of ``has_value()``","Kona November 2023","|Complete|","16.0","" "`3953 `__","``iter_move`` for ``common_iterator`` and ``counted_iterator`` should return ``decltype(auto)``","Kona November 2023","","","|ranges|" "`3957 `__","[container.alloc.reqmts] The value category of v should be claimed","Kona November 2023","","","" "`3965 `__","Incorrect example in [format.string.escaped] p3 for formatting of combining characters","Kona November 2023","|Complete|","19.0","|format|" diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 161d7a7d215bd..cfe1f44777bca 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -219,7 +219,6 @@ set(files __atomic/kill_dependency.h __atomic/memory_order.h __atomic/to_gcc_order.h - __availability __bit/bit_cast.h __bit/bit_ceil.h __bit/bit_floor.h @@ -315,7 +314,9 @@ set(files __condition_variable/condition_variable.h __config __configuration/abi.h + __configuration/availability.h __configuration/compiler.h + __configuration/language.h __configuration/platform.h __coroutine/coroutine_handle.h __coroutine/coroutine_traits.h diff --git a/libcxx/include/__atomic/atomic_base.h b/libcxx/include/__atomic/atomic_base.h index e9badccc25a62..d7a5b99b54691 100644 --- a/libcxx/include/__atomic/atomic_base.h +++ b/libcxx/include/__atomic/atomic_base.h @@ -14,7 +14,6 @@ #include <__atomic/cxx_atomic_impl.h> #include <__atomic/is_always_lock_free.h> #include <__atomic/memory_order.h> -#include <__availability> #include <__config> #include <__memory/addressof.h> #include <__type_traits/is_integral.h> diff --git a/libcxx/include/__atomic/atomic_flag.h b/libcxx/include/__atomic/atomic_flag.h index 3ec3366ecaaf9..00b157cdff78b 100644 --- a/libcxx/include/__atomic/atomic_flag.h +++ b/libcxx/include/__atomic/atomic_flag.h @@ -13,7 +13,6 @@ #include <__atomic/contention_t.h> #include <__atomic/cxx_atomic_impl.h> #include <__atomic/memory_order.h> -#include <__availability> #include <__chrono/duration.h> #include <__config> #include <__memory/addressof.h> diff --git a/libcxx/include/__atomic/atomic_sync.h b/libcxx/include/__atomic/atomic_sync.h index 175700be54c01..1de5037329f81 100644 --- a/libcxx/include/__atomic/atomic_sync.h +++ b/libcxx/include/__atomic/atomic_sync.h @@ -13,7 +13,6 @@ #include <__atomic/cxx_atomic_impl.h> #include <__atomic/memory_order.h> #include <__atomic/to_gcc_order.h> -#include <__availability> #include <__chrono/duration.h> #include <__config> #include <__memory/addressof.h> diff --git a/libcxx/include/__charconv/to_chars_floating_point.h b/libcxx/include/__charconv/to_chars_floating_point.h index 08720e1078852..118f316b21a10 100644 --- a/libcxx/include/__charconv/to_chars_floating_point.h +++ b/libcxx/include/__charconv/to_chars_floating_point.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___CHARCONV_TO_CHARS_FLOATING_POINT_H #define _LIBCPP___CHARCONV_TO_CHARS_FLOATING_POINT_H -#include <__availability> #include <__charconv/chars_format.h> #include <__charconv/to_chars_result.h> #include <__config> diff --git a/libcxx/include/__chrono/file_clock.h b/libcxx/include/__chrono/file_clock.h index 7d25729fec013..4dd3f88ce5ba4 100644 --- a/libcxx/include/__chrono/file_clock.h +++ b/libcxx/include/__chrono/file_clock.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___CHRONO_FILE_CLOCK_H #define _LIBCPP___CHRONO_FILE_CLOCK_H -#include <__availability> #include <__chrono/duration.h> #include <__chrono/system_clock.h> #include <__chrono/time_point.h> diff --git a/libcxx/include/__chrono/tzdb_list.h b/libcxx/include/__chrono/tzdb_list.h index 62db7e3d2e0b5..aeef4fe1aba3c 100644 --- a/libcxx/include/__chrono/tzdb_list.h +++ b/libcxx/include/__chrono/tzdb_list.h @@ -16,7 +16,6 @@ // Enable the contents of the header only when libc++ was built with experimental features enabled. #if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) -# include <__availability> # include <__chrono/time_zone.h> # include <__chrono/tzdb.h> # include <__config> diff --git a/libcxx/include/__config b/libcxx/include/__config index e048dad52c466..79422e8f6c5d1 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -12,6 +12,7 @@ #include <__config_site> #include <__configuration/abi.h> +#include <__configuration/availability.h> #include <__configuration/compiler.h> #include <__configuration/platform.h> @@ -35,25 +36,6 @@ # define _LIBCPP_FREESTANDING # endif -// NOLINTBEGIN(libcpp-cpp-version-check) -# ifndef _LIBCPP_STD_VER -# if __cplusplus <= 201103L -# define _LIBCPP_STD_VER 11 -# elif __cplusplus <= 201402L -# define _LIBCPP_STD_VER 14 -# elif __cplusplus <= 201703L -# define _LIBCPP_STD_VER 17 -# elif __cplusplus <= 202002L -# define _LIBCPP_STD_VER 20 -# elif __cplusplus <= 202302L -# define _LIBCPP_STD_VER 23 -# else -// Expected release year of the next C++ standard -# define _LIBCPP_STD_VER 26 -# endif -# endif // _LIBCPP_STD_VER -// NOLINTEND(libcpp-cpp-version-check) - // HARDENING { // TODO(hardening): deprecate this in LLVM 19. @@ -364,10 +346,6 @@ typedef __char32_t char32_t; # endif -# if !defined(__cpp_exceptions) || __cpp_exceptions < 199711L -# define _LIBCPP_HAS_NO_EXCEPTIONS -# endif - # define _LIBCPP_PREFERRED_ALIGNOF(_Tp) __alignof(_Tp) # if defined(_LIBCPP_COMPILER_CLANG_BASED) @@ -840,11 +818,6 @@ typedef __char32_t char32_t; # define _LIBCPP_CONSTEXPR_SINCE_CXX23 # endif -// Try to find out if RTTI is disabled. -# if !defined(__cpp_rtti) || __cpp_rtti < 199711L -# define _LIBCPP_HAS_NO_RTTI -# endif - # ifndef _LIBCPP_WEAK # define _LIBCPP_WEAK __attribute__((__weak__)) # endif diff --git a/libcxx/include/__availability b/libcxx/include/__configuration/availability.h similarity index 98% rename from libcxx/include/__availability rename to libcxx/include/__configuration/availability.h index e44ac1962df36..1115431115ec3 100644 --- a/libcxx/include/__availability +++ b/libcxx/include/__configuration/availability.h @@ -7,10 +7,11 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___AVAILABILITY -#define _LIBCPP___AVAILABILITY +#ifndef _LIBCPP___CONFIGURATION_AVAILABILITY_H +#define _LIBCPP___CONFIGURATION_AVAILABILITY_H -#include <__config> +#include <__configuration/compiler.h> +#include <__configuration/language.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -374,4 +375,4 @@ # define _LIBCPP_AVAILABILITY_INIT_PRIMARY_EXCEPTION #endif -#endif // _LIBCPP___AVAILABILITY +#endif // _LIBCPP___CONFIGURATION_AVAILABILITY_H diff --git a/libcxx/include/__configuration/language.h b/libcxx/include/__configuration/language.h new file mode 100644 index 0000000000000..fa62a7b6f5c2a --- /dev/null +++ b/libcxx/include/__configuration/language.h @@ -0,0 +1,46 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___CONFIGURATION_LANGUAGE_H +#define _LIBCPP___CONFIGURATION_LANGUAGE_H + +#include <__config_site> + +#ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER +# pragma GCC system_header +#endif + +// NOLINTBEGIN(libcpp-cpp-version-check) +#ifdef __cplusplus +# if __cplusplus <= 201103L +# define _LIBCPP_STD_VER 11 +# elif __cplusplus <= 201402L +# define _LIBCPP_STD_VER 14 +# elif __cplusplus <= 201703L +# define _LIBCPP_STD_VER 17 +# elif __cplusplus <= 202002L +# define _LIBCPP_STD_VER 20 +# elif __cplusplus <= 202302L +# define _LIBCPP_STD_VER 23 +# else +// Expected release year of the next C++ standard +# define _LIBCPP_STD_VER 26 +# endif +#endif // __cplusplus +// NOLINTEND(libcpp-cpp-version-check) + +#if !defined(__cpp_rtti) || __cpp_rtti < 199711L +# define _LIBCPP_HAS_NO_RTTI +#endif + +#if !defined(__cpp_exceptions) || __cpp_exceptions < 199711L +# define _LIBCPP_HAS_NO_EXCEPTIONS +#endif + +#endif // _LIBCPP___CONFIGURATION_LANGUAGE_H diff --git a/libcxx/include/__exception/exception_ptr.h b/libcxx/include/__exception/exception_ptr.h index 868fd7c015339..0a8337fa39de3 100644 --- a/libcxx/include/__exception/exception_ptr.h +++ b/libcxx/include/__exception/exception_ptr.h @@ -9,7 +9,6 @@ #ifndef _LIBCPP___EXCEPTION_EXCEPTION_PTR_H #define _LIBCPP___EXCEPTION_EXCEPTION_PTR_H -#include <__availability> #include <__config> #include <__exception/operations.h> #include <__memory/addressof.h> diff --git a/libcxx/include/__expected/bad_expected_access.h b/libcxx/include/__expected/bad_expected_access.h index ef29fa5088313..1b734389e8311 100644 --- a/libcxx/include/__expected/bad_expected_access.h +++ b/libcxx/include/__expected/bad_expected_access.h @@ -9,7 +9,6 @@ #ifndef _LIBCPP___EXPECTED_BAD_EXPECTED_ACCESS_H #define _LIBCPP___EXPECTED_BAD_EXPECTED_ACCESS_H -#include <__availability> #include <__config> #include <__exception/exception.h> #include <__utility/move.h> diff --git a/libcxx/include/__filesystem/directory_entry.h b/libcxx/include/__filesystem/directory_entry.h index 016ad94a853dc..96d88dcd90b4c 100644 --- a/libcxx/include/__filesystem/directory_entry.h +++ b/libcxx/include/__filesystem/directory_entry.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___FILESYSTEM_DIRECTORY_ENTRY_H #define _LIBCPP___FILESYSTEM_DIRECTORY_ENTRY_H -#include <__availability> #include <__chrono/time_point.h> #include <__compare/ordering.h> #include <__config> diff --git a/libcxx/include/__filesystem/directory_iterator.h b/libcxx/include/__filesystem/directory_iterator.h index a5aa5ff5432da..e0246d8001e19 100644 --- a/libcxx/include/__filesystem/directory_iterator.h +++ b/libcxx/include/__filesystem/directory_iterator.h @@ -11,7 +11,6 @@ #define _LIBCPP___FILESYSTEM_DIRECTORY_ITERATOR_H #include <__assert> -#include <__availability> #include <__config> #include <__filesystem/directory_entry.h> #include <__filesystem/directory_options.h> diff --git a/libcxx/include/__filesystem/filesystem_error.h b/libcxx/include/__filesystem/filesystem_error.h index bfdcc5eaee521..80a11e3b1932c 100644 --- a/libcxx/include/__filesystem/filesystem_error.h +++ b/libcxx/include/__filesystem/filesystem_error.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___FILESYSTEM_FILESYSTEM_ERROR_H #define _LIBCPP___FILESYSTEM_FILESYSTEM_ERROR_H -#include <__availability> #include <__config> #include <__filesystem/path.h> #include <__memory/shared_ptr.h> diff --git a/libcxx/include/__filesystem/operations.h b/libcxx/include/__filesystem/operations.h index 9bb83576f54bc..f588189ed1d9d 100644 --- a/libcxx/include/__filesystem/operations.h +++ b/libcxx/include/__filesystem/operations.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___FILESYSTEM_OPERATIONS_H #define _LIBCPP___FILESYSTEM_OPERATIONS_H -#include <__availability> #include <__chrono/time_point.h> #include <__config> #include <__filesystem/copy_options.h> diff --git a/libcxx/include/__filesystem/path.h b/libcxx/include/__filesystem/path.h index 89d319b4b19b5..ff468d517722f 100644 --- a/libcxx/include/__filesystem/path.h +++ b/libcxx/include/__filesystem/path.h @@ -12,7 +12,6 @@ #include <__algorithm/replace.h> #include <__algorithm/replace_copy.h> -#include <__availability> #include <__config> #include <__functional/unary_function.h> #include <__fwd/functional.h> diff --git a/libcxx/include/__filesystem/path_iterator.h b/libcxx/include/__filesystem/path_iterator.h index d2d65cd122cab..f4d486d86cf38 100644 --- a/libcxx/include/__filesystem/path_iterator.h +++ b/libcxx/include/__filesystem/path_iterator.h @@ -11,7 +11,6 @@ #define _LIBCPP___FILESYSTEM_PATH_ITERATOR_H #include <__assert> -#include <__availability> #include <__config> #include <__filesystem/path.h> #include <__iterator/iterator_traits.h> diff --git a/libcxx/include/__filesystem/recursive_directory_iterator.h b/libcxx/include/__filesystem/recursive_directory_iterator.h index a8af4f73b14a5..caa1396eb301f 100644 --- a/libcxx/include/__filesystem/recursive_directory_iterator.h +++ b/libcxx/include/__filesystem/recursive_directory_iterator.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___FILESYSTEM_RECURSIVE_DIRECTORY_ITERATOR_H #define _LIBCPP___FILESYSTEM_RECURSIVE_DIRECTORY_ITERATOR_H -#include <__availability> #include <__config> #include <__filesystem/directory_entry.h> #include <__filesystem/directory_options.h> diff --git a/libcxx/include/__filesystem/u8path.h b/libcxx/include/__filesystem/u8path.h index bde878054865e..dae5823128f02 100644 --- a/libcxx/include/__filesystem/u8path.h +++ b/libcxx/include/__filesystem/u8path.h @@ -11,7 +11,6 @@ #define _LIBCPP___FILESYSTEM_U8PATH_H #include <__algorithm/unwrap_iter.h> -#include <__availability> #include <__config> #include <__filesystem/path.h> #include diff --git a/libcxx/include/__functional/function.h b/libcxx/include/__functional/function.h index 36057706933d4..244e55be3403c 100644 --- a/libcxx/include/__functional/function.h +++ b/libcxx/include/__functional/function.h @@ -11,7 +11,6 @@ #define _LIBCPP___FUNCTIONAL_FUNCTION_H #include <__assert> -#include <__availability> #include <__config> #include <__exception/exception.h> #include <__functional/binary_function.h> diff --git a/libcxx/include/__fwd/memory_resource.h b/libcxx/include/__fwd/memory_resource.h index 03b78ad2bd3c0..d68b2c2b63154 100644 --- a/libcxx/include/__fwd/memory_resource.h +++ b/libcxx/include/__fwd/memory_resource.h @@ -9,7 +9,6 @@ #ifndef _LIBCPP___FWD_MEMORY_RESOURCE_H #define _LIBCPP___FWD_MEMORY_RESOURCE_H -#include <__availability> #include <__config> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__fwd/string.h b/libcxx/include/__fwd/string.h index 320c4e4c81836..2418e1f9b23d0 100644 --- a/libcxx/include/__fwd/string.h +++ b/libcxx/include/__fwd/string.h @@ -9,7 +9,6 @@ #ifndef _LIBCPP___FWD_STRING_H #define _LIBCPP___FWD_STRING_H -#include <__availability> #include <__config> #include <__fwd/memory.h> #include <__fwd/memory_resource.h> diff --git a/libcxx/include/__memory/shared_ptr.h b/libcxx/include/__memory/shared_ptr.h index 992b1ba43f100..de5707c4a67b0 100644 --- a/libcxx/include/__memory/shared_ptr.h +++ b/libcxx/include/__memory/shared_ptr.h @@ -403,6 +403,9 @@ struct __shared_ptr_deleter_ctor_reqs { __well_formed_deleter<_Dp, _Yp*>::value; }; +template +using __shared_ptr_nullptr_deleter_ctor_reqs = _And, __well_formed_deleter<_Dp, nullptr_t> >; + #if defined(_LIBCPP_ABI_ENABLE_SHARED_PTR_TRIVIAL_ABI) # define _LIBCPP_SHARED_PTR_TRIVIAL_ABI __attribute__((__trivial_abi__)) #else @@ -498,7 +501,7 @@ class _LIBCPP_SHARED_PTR_TRIVIAL_ABI _LIBCPP_TEMPLATE_VIS shared_ptr { #endif // _LIBCPP_HAS_NO_EXCEPTIONS } - template + template ::value, int> = 0 > _LIBCPP_HIDE_FROM_ABI shared_ptr(nullptr_t __p, _Dp __d) : __ptr_(nullptr) { #ifndef _LIBCPP_HAS_NO_EXCEPTIONS try { @@ -518,7 +521,7 @@ class _LIBCPP_SHARED_PTR_TRIVIAL_ABI _LIBCPP_TEMPLATE_VIS shared_ptr { #endif // _LIBCPP_HAS_NO_EXCEPTIONS } - template + template ::value, int> = 0 > _LIBCPP_HIDE_FROM_ABI shared_ptr(nullptr_t __p, _Dp __d, _Alloc __a) : __ptr_(nullptr) { #ifndef _LIBCPP_HAS_NO_EXCEPTIONS try { diff --git a/libcxx/include/__memory_resource/memory_resource.h b/libcxx/include/__memory_resource/memory_resource.h index e605838bf5ea4..ea85e50cd568b 100644 --- a/libcxx/include/__memory_resource/memory_resource.h +++ b/libcxx/include/__memory_resource/memory_resource.h @@ -9,7 +9,6 @@ #ifndef _LIBCPP___MEMORY_RESOURCE_MEMORY_RESOURCE_H #define _LIBCPP___MEMORY_RESOURCE_MEMORY_RESOURCE_H -#include <__availability> #include <__config> #include <__fwd/memory_resource.h> #include diff --git a/libcxx/include/__memory_resource/monotonic_buffer_resource.h b/libcxx/include/__memory_resource/monotonic_buffer_resource.h index 0c83f1ebc8db4..f45b30fdb3861 100644 --- a/libcxx/include/__memory_resource/monotonic_buffer_resource.h +++ b/libcxx/include/__memory_resource/monotonic_buffer_resource.h @@ -9,7 +9,6 @@ #ifndef _LIBCPP___MEMORY_RESOURCE_MONOTONIC_BUFFER_RESOURCE_H #define _LIBCPP___MEMORY_RESOURCE_MONOTONIC_BUFFER_RESOURCE_H -#include <__availability> #include <__config> #include <__memory/addressof.h> #include <__memory_resource/memory_resource.h> diff --git a/libcxx/include/__memory_resource/polymorphic_allocator.h b/libcxx/include/__memory_resource/polymorphic_allocator.h index 8fda201124387..a71096d3e4784 100644 --- a/libcxx/include/__memory_resource/polymorphic_allocator.h +++ b/libcxx/include/__memory_resource/polymorphic_allocator.h @@ -10,7 +10,6 @@ #define _LIBCPP___MEMORY_RESOURCE_POLYMORPHIC_ALLOCATOR_H #include <__assert> -#include <__availability> #include <__config> #include <__fwd/pair.h> #include <__memory_resource/memory_resource.h> diff --git a/libcxx/include/__memory_resource/synchronized_pool_resource.h b/libcxx/include/__memory_resource/synchronized_pool_resource.h index b261fb0b194a8..50a673c2861d1 100644 --- a/libcxx/include/__memory_resource/synchronized_pool_resource.h +++ b/libcxx/include/__memory_resource/synchronized_pool_resource.h @@ -9,7 +9,6 @@ #ifndef _LIBCPP___MEMORY_RESOURCE_SYNCHRONIZED_POOL_RESOURCE_H #define _LIBCPP___MEMORY_RESOURCE_SYNCHRONIZED_POOL_RESOURCE_H -#include <__availability> #include <__config> #include <__memory_resource/memory_resource.h> #include <__memory_resource/pool_options.h> diff --git a/libcxx/include/__memory_resource/unsynchronized_pool_resource.h b/libcxx/include/__memory_resource/unsynchronized_pool_resource.h index 81d5f9ec4da87..783db84262af7 100644 --- a/libcxx/include/__memory_resource/unsynchronized_pool_resource.h +++ b/libcxx/include/__memory_resource/unsynchronized_pool_resource.h @@ -9,7 +9,6 @@ #ifndef _LIBCPP___MEMORY_RESOURCE_UNSYNCHRONIZED_POOL_RESOURCE_H #define _LIBCPP___MEMORY_RESOURCE_UNSYNCHRONIZED_POOL_RESOURCE_H -#include <__availability> #include <__config> #include <__memory_resource/memory_resource.h> #include <__memory_resource/pool_options.h> diff --git a/libcxx/include/__ostream/print.h b/libcxx/include/__ostream/print.h index 97680cdab6da3..8265ac00777e2 100644 --- a/libcxx/include/__ostream/print.h +++ b/libcxx/include/__ostream/print.h @@ -9,7 +9,6 @@ #ifndef _LIBCPP___OSTREAM_PRINT_H #define _LIBCPP___OSTREAM_PRINT_H -#include <__availability> #include <__config> #include <__fwd/ostream.h> #include <__iterator/ostreambuf_iterator.h> diff --git a/libcxx/include/__stop_token/stop_callback.h b/libcxx/include/__stop_token/stop_callback.h index 7b526820f98a3..760cf2bb55b0c 100644 --- a/libcxx/include/__stop_token/stop_callback.h +++ b/libcxx/include/__stop_token/stop_callback.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___STOP_TOKEN_STOP_CALLBACK_H #define _LIBCPP___STOP_TOKEN_STOP_CALLBACK_H -#include <__availability> #include <__concepts/constructible.h> #include <__concepts/destructible.h> #include <__concepts/invocable.h> diff --git a/libcxx/include/__stop_token/stop_source.h b/libcxx/include/__stop_token/stop_source.h index 1080069cf3b8b..70697462784ab 100644 --- a/libcxx/include/__stop_token/stop_source.h +++ b/libcxx/include/__stop_token/stop_source.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___STOP_TOKEN_STOP_SOURCE_H #define _LIBCPP___STOP_TOKEN_STOP_SOURCE_H -#include <__availability> #include <__config> #include <__stop_token/intrusive_shared_ptr.h> #include <__stop_token/stop_state.h> diff --git a/libcxx/include/__stop_token/stop_state.h b/libcxx/include/__stop_token/stop_state.h index df07573f87862..b0eed13a143cf 100644 --- a/libcxx/include/__stop_token/stop_state.h +++ b/libcxx/include/__stop_token/stop_state.h @@ -11,7 +11,6 @@ #define _LIBCPP___STOP_TOKEN_STOP_STATE_H #include <__assert> -#include <__availability> #include <__config> #include <__stop_token/atomic_unique_lock.h> #include <__stop_token/intrusive_list_view.h> diff --git a/libcxx/include/__stop_token/stop_token.h b/libcxx/include/__stop_token/stop_token.h index f2eadb990bdec..1bd75cbbf6f8d 100644 --- a/libcxx/include/__stop_token/stop_token.h +++ b/libcxx/include/__stop_token/stop_token.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___STOP_TOKEN_STOP_TOKEN_H #define _LIBCPP___STOP_TOKEN_STOP_TOKEN_H -#include <__availability> #include <__config> #include <__stop_token/intrusive_shared_ptr.h> #include <__stop_token/stop_state.h> diff --git a/libcxx/include/__thread/jthread.h b/libcxx/include/__thread/jthread.h index 253e3a935d9b7..b3d5c25fb71c7 100644 --- a/libcxx/include/__thread/jthread.h +++ b/libcxx/include/__thread/jthread.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___THREAD_JTHREAD_H #define _LIBCPP___THREAD_JTHREAD_H -#include <__availability> #include <__config> #include <__functional/invoke.h> #include <__stop_token/stop_source.h> diff --git a/libcxx/include/__thread/poll_with_backoff.h b/libcxx/include/__thread/poll_with_backoff.h index d8354e6ca2398..4f961fe3f7629 100644 --- a/libcxx/include/__thread/poll_with_backoff.h +++ b/libcxx/include/__thread/poll_with_backoff.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___THREAD_POLL_WITH_BACKOFF_H #define _LIBCPP___THREAD_POLL_WITH_BACKOFF_H -#include <__availability> #include <__chrono/duration.h> #include <__chrono/high_resolution_clock.h> #include <__config> diff --git a/libcxx/include/__verbose_abort b/libcxx/include/__verbose_abort index 259c70dda8fe8..1e2265a6bf755 100644 --- a/libcxx/include/__verbose_abort +++ b/libcxx/include/__verbose_abort @@ -10,7 +10,6 @@ #ifndef _LIBCPP___VERBOSE_ABORT #define _LIBCPP___VERBOSE_ABORT -#include <__availability> #include <__config> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/barrier b/libcxx/include/barrier index a6b4d2288309e..bce67bb5d3425 100644 --- a/libcxx/include/barrier +++ b/libcxx/include/barrier @@ -54,7 +54,6 @@ namespace std #include <__assert> #include <__atomic/atomic_base.h> #include <__atomic/memory_order.h> -#include <__availability> #include <__memory/unique_ptr.h> #include <__thread/poll_with_backoff.h> #include <__thread/timed_backoff_policy.h> diff --git a/libcxx/include/condition_variable b/libcxx/include/condition_variable index 4ded1140d46b1..5195cd6057dd3 100644 --- a/libcxx/include/condition_variable +++ b/libcxx/include/condition_variable @@ -118,7 +118,6 @@ public: */ -#include <__availability> #include <__chrono/duration.h> #include <__chrono/steady_clock.h> #include <__chrono/time_point.h> diff --git a/libcxx/include/deque b/libcxx/include/deque index 3c33e04e9f05f..555761aae6afd 100644 --- a/libcxx/include/deque +++ b/libcxx/include/deque @@ -189,7 +189,6 @@ template #include <__algorithm/remove_if.h> #include <__algorithm/unwrap_iter.h> #include <__assert> -#include <__availability> #include <__config> #include <__debug_utils/sanitizers.h> #include <__format/enable_insertable.h> diff --git a/libcxx/include/forward_list b/libcxx/include/forward_list index 80dd49fe3d75a..363931e3f2388 100644 --- a/libcxx/include/forward_list +++ b/libcxx/include/forward_list @@ -199,7 +199,6 @@ template #include <__algorithm/lexicographical_compare.h> #include <__algorithm/lexicographical_compare_three_way.h> #include <__algorithm/min.h> -#include <__availability> #include <__config> #include <__iterator/distance.h> #include <__iterator/iterator_traits.h> diff --git a/libcxx/include/fstream b/libcxx/include/fstream index 7128f72e16119..18f4dd3eed0b2 100644 --- a/libcxx/include/fstream +++ b/libcxx/include/fstream @@ -188,7 +188,6 @@ typedef basic_fstream wfstream; #include <__algorithm/max.h> #include <__assert> -#include <__availability> #include <__config> #include <__fwd/fstream.h> #include <__locale> diff --git a/libcxx/include/latch b/libcxx/include/latch index 1937617f7dcc6..da8dae149c79f 100644 --- a/libcxx/include/latch +++ b/libcxx/include/latch @@ -50,7 +50,6 @@ namespace std #include <__atomic/atomic_base.h> #include <__atomic/atomic_sync.h> #include <__atomic/memory_order.h> -#include <__availability> #include #include #include diff --git a/libcxx/include/list b/libcxx/include/list index 610a24e384600..87f15e144ac8f 100644 --- a/libcxx/include/list +++ b/libcxx/include/list @@ -203,7 +203,6 @@ template #include <__algorithm/lexicographical_compare_three_way.h> #include <__algorithm/min.h> #include <__assert> -#include <__availability> #include <__config> #include <__format/enable_insertable.h> #include <__iterator/distance.h> diff --git a/libcxx/include/map b/libcxx/include/map index 1d1c062a0267c..7efa715e84aa7 100644 --- a/libcxx/include/map +++ b/libcxx/include/map @@ -575,7 +575,6 @@ erase_if(multimap& c, Predicate pred); // C++20 #include <__algorithm/lexicographical_compare.h> #include <__algorithm/lexicographical_compare_three_way.h> #include <__assert> -#include <__availability> #include <__config> #include <__functional/binary_function.h> #include <__functional/is_transparent.h> diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index 1f7c2a183f63d..48391b2a12095 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -570,10 +570,6 @@ module std_private_assert [system] { header "__assert" export * } -module std_private_availability [system] { - header "__availability" - export * -} module std_private_bit_reference [system] { header "__bit_reference" export * @@ -584,7 +580,9 @@ module std_private_fwd_bit_reference [system] { module std_private_config [system] { textual header "__config" textual header "__configuration/abi.h" + textual header "__configuration/availability.h" textual header "__configuration/compiler.h" + textual header "__configuration/language.h" textual header "__configuration/platform.h" export * } diff --git a/libcxx/include/optional b/libcxx/include/optional index a16e48502e250..622e150f7a9f7 100644 --- a/libcxx/include/optional +++ b/libcxx/include/optional @@ -178,7 +178,6 @@ namespace std { */ #include <__assert> -#include <__availability> #include <__compare/compare_three_way_result.h> #include <__compare/three_way_comparable.h> #include <__concepts/invocable.h> diff --git a/libcxx/include/print b/libcxx/include/print index e0bcf214ea239..5bdaa559af724 100644 --- a/libcxx/include/print +++ b/libcxx/include/print @@ -34,7 +34,6 @@ namespace std { */ #include <__assert> -#include <__availability> #include <__concepts/same_as.h> #include <__config> #include <__system_error/system_error.h> diff --git a/libcxx/include/regex b/libcxx/include/regex index ce9f34260254a..b3869d36de1df 100644 --- a/libcxx/include/regex +++ b/libcxx/include/regex @@ -792,7 +792,6 @@ typedef regex_token_iterator wsregex_token_iterator; #include <__algorithm/find.h> #include <__algorithm/search.h> #include <__assert> -#include <__availability> #include <__config> #include <__iterator/back_insert_iterator.h> #include <__iterator/default_sentinel.h> diff --git a/libcxx/include/semaphore b/libcxx/include/semaphore index cb2f42c106ca8..8d3b04475c092 100644 --- a/libcxx/include/semaphore +++ b/libcxx/include/semaphore @@ -55,7 +55,6 @@ using binary_semaphore = counting_semaphore<1>; #include <__atomic/atomic_base.h> #include <__atomic/atomic_sync.h> #include <__atomic/memory_order.h> -#include <__availability> #include <__chrono/time_point.h> #include <__thread/poll_with_backoff.h> #include <__thread/support.h> diff --git a/libcxx/include/set b/libcxx/include/set index d9377ee6c3322..ab3a4363499af 100644 --- a/libcxx/include/set +++ b/libcxx/include/set @@ -516,7 +516,6 @@ erase_if(multiset& c, Predicate pred); // C++20 #include <__algorithm/lexicographical_compare.h> #include <__algorithm/lexicographical_compare_three_way.h> #include <__assert> -#include <__availability> #include <__config> #include <__functional/is_transparent.h> #include <__functional/operations.h> diff --git a/libcxx/include/sstream b/libcxx/include/sstream index 5009fe5c0057b..9ba43ffeb850f 100644 --- a/libcxx/include/sstream +++ b/libcxx/include/sstream @@ -312,7 +312,6 @@ typedef basic_stringstream wstringstream; // clang-format on -#include <__availability> #include <__config> #include <__fwd/sstream.h> #include <__ostream/basic_ostream.h> diff --git a/libcxx/include/unordered_map b/libcxx/include/unordered_map index c838cd96b1123..2e25b0f050695 100644 --- a/libcxx/include/unordered_map +++ b/libcxx/include/unordered_map @@ -585,7 +585,6 @@ template #include <__algorithm/is_permutation.h> #include <__assert> -#include <__availability> #include <__config> #include <__functional/is_transparent.h> #include <__functional/operations.h> diff --git a/libcxx/include/unordered_set b/libcxx/include/unordered_set index 5de1458beb1e6..c966cc8eb4df1 100644 --- a/libcxx/include/unordered_set +++ b/libcxx/include/unordered_set @@ -533,7 +533,6 @@ template #include <__algorithm/is_permutation.h> #include <__assert> -#include <__availability> #include <__config> #include <__functional/is_transparent.h> #include <__functional/operations.h> diff --git a/libcxx/include/variant b/libcxx/include/variant index 631ffceab5f68..7ebd0534b1641 100644 --- a/libcxx/include/variant +++ b/libcxx/include/variant @@ -212,7 +212,6 @@ namespace std { */ -#include <__availability> #include <__compare/common_comparison_category.h> #include <__compare/compare_three_way_result.h> #include <__compare/three_way_comparable.h> diff --git a/libcxx/include/vector b/libcxx/include/vector index b190557fb7b7e..cbfc2cefa1fd9 100644 --- a/libcxx/include/vector +++ b/libcxx/include/vector @@ -316,7 +316,6 @@ template requires is-vector-bool-reference // Since C++ #include <__algorithm/rotate.h> #include <__algorithm/unwrap_iter.h> #include <__assert> -#include <__availability> #include <__bit_reference> #include <__concepts/same_as.h> #include <__config> diff --git a/libcxx/include/version b/libcxx/include/version index 69556d731f1cf..d433e1b1c9cea 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -255,7 +255,6 @@ __cpp_lib_void_t 201411L */ -#include <__availability> #include <__config> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -459,7 +458,6 @@ __cpp_lib_void_t 201411L # define __cpp_lib_constexpr_typeinfo 202106L # define __cpp_lib_containers_ranges 202202L # define __cpp_lib_expected 202211L -// # define __cpp_lib_format_path 202403L # define __cpp_lib_format_ranges 202207L // # define __cpp_lib_formatters 202302L # define __cpp_lib_forward_like 202207L @@ -490,7 +488,6 @@ __cpp_lib_void_t 201411L # define __cpp_lib_stdatomic_h 202011L # define __cpp_lib_string_contains 202011L # define __cpp_lib_string_resize_and_overwrite 202110L -// # define __cpp_lib_to_string 202306L # define __cpp_lib_to_underlying 202102L // # define __cpp_lib_tuple_like 202207L # define __cpp_lib_unreachable 202202L @@ -506,6 +503,7 @@ __cpp_lib_void_t 201411L // # define __cpp_lib_copyable_function 202306L // # define __cpp_lib_debugging 202311L // # define __cpp_lib_default_template_type_for_algorithm_values 202403L +// # define __cpp_lib_format_path 202403L // # define __cpp_lib_freestanding_algorithm 202311L // # define __cpp_lib_freestanding_array 202311L // # define __cpp_lib_freestanding_cstring 202306L @@ -537,6 +535,7 @@ __cpp_lib_void_t 201411L // # define __cpp_lib_text_encoding 202306L # undef __cpp_lib_to_chars // # define __cpp_lib_to_chars 202306L +// # define __cpp_lib_to_string 202306L # undef __cpp_lib_tuple_like // # define __cpp_lib_tuple_like 202311L #endif diff --git a/libcxx/modules/CMakeLists.txt b/libcxx/modules/CMakeLists.txt index d47d19a475531..82cd7b66beb7a 100644 --- a/libcxx/modules/CMakeLists.txt +++ b/libcxx/modules/CMakeLists.txt @@ -202,6 +202,7 @@ add_custom_target(generate-cxx-modules ALL DEPENDS ${_all_modules} ) +add_dependencies(cxx-test-depends generate-cxx-modules) # Configure the modules manifest. # Use the relative path between the installation and the module in the json diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt index 8b28d1b891895..65e6ce2c4da43 100644 --- a/libcxx/src/CMakeLists.txt +++ b/libcxx/src/CMakeLists.txt @@ -322,6 +322,7 @@ endif() # Add a meta-target for both libraries. add_custom_target(cxx DEPENDS ${LIBCXX_BUILD_TARGETS}) +add_dependencies(cxx-test-depends cxx) set(LIBCXX_EXPERIMENTAL_SOURCES experimental/keep.cpp @@ -366,6 +367,7 @@ set_target_properties(cxx_experimental ) cxx_add_common_build_flags(cxx_experimental) target_compile_options(cxx_experimental PUBLIC -D_LIBCPP_ENABLE_EXPERIMENTAL) +add_dependencies(cxx-test-depends cxx_experimental) if (LIBCXX_INSTALL_SHARED_LIBRARY) install(TARGETS cxx_shared diff --git a/libcxx/src/optional.cpp b/libcxx/src/optional.cpp index 6ba63f2d89f5a..62b474a312be2 100644 --- a/libcxx/src/optional.cpp +++ b/libcxx/src/optional.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include <__availability> #include #include diff --git a/libcxx/src/ostream.cpp b/libcxx/src/ostream.cpp index 443dce9a390be..e1a9a4bc1de71 100644 --- a/libcxx/src/ostream.cpp +++ b/libcxx/src/ostream.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include <__availability> #include <__config> #ifndef _LIBCPP_HAS_NO_FILESYSTEM # include diff --git a/libcxx/test/CMakeLists.txt b/libcxx/test/CMakeLists.txt index fd57aa9fe8b37..3c54a4edccff3 100644 --- a/libcxx/test/CMakeLists.txt +++ b/libcxx/test/CMakeLists.txt @@ -1,11 +1,5 @@ include(HandleLitArguments) add_subdirectory(tools) -# When the tools add clang-tidy support, the dependencies need to be updated. -# This cannot be done in the tools CMakeLists.txt since that does not update -# the status in this (a parent) directory. -if(TARGET cxx-tidy) - list(APPEND LIBCXX_TEST_DEPS cxx-tidy) -endif() # By default, libcxx and libcxxabi share a library directory. if (NOT LIBCXX_CXX_ABI_LIBRARY_PATH) @@ -16,6 +10,8 @@ endif() set(AUTO_GEN_COMMENT "## Autogenerated by libcxx configuration.\n# Do not edit!") set(SERIALIZED_LIT_PARAMS "# Lit parameters serialized here for llvm-lit to pick them up\n") +serialize_lit_string_param(SERIALIZED_LIT_PARAMS compiler "${CMAKE_CXX_COMPILER}") + if (NOT LIBCXX_ENABLE_EXCEPTIONS) serialize_lit_param(SERIALIZED_LIT_PARAMS enable_exceptions False) endif() @@ -38,10 +34,6 @@ endif() serialize_lit_params_list(SERIALIZED_LIT_PARAMS LIBCXX_TEST_PARAMS) -if (NOT DEFINED LIBCXX_TEST_DEPS) - message(FATAL_ERROR "Expected LIBCXX_TEST_DEPS to be defined") -endif() - if (MSVC) # Shared code for initializing some parameters used by all # llvm-libc++-*-clangcl.cfg.in test configs. @@ -79,10 +71,6 @@ if (LIBCXX_INCLUDE_TESTS) ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg MAIN_CONFIG "${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py") - add_custom_target(cxx-test-depends - DEPENDS cxx ${LIBCXX_TEST_DEPS} - COMMENT "Builds dependencies required to run the test suite.") - add_lit_testsuite(check-cxx "Running libcxx tests" ${CMAKE_CURRENT_BINARY_DIR} diff --git a/libcxx/test/configs/cmake-bridge.cfg.in b/libcxx/test/configs/cmake-bridge.cfg.in index 84b3270a8940a..78d0cb5a25748 100644 --- a/libcxx/test/configs/cmake-bridge.cfg.in +++ b/libcxx/test/configs/cmake-bridge.cfg.in @@ -23,8 +23,6 @@ config.recursiveExpansionLimit = 10 config.test_exec_root = os.path.join('@CMAKE_BINARY_DIR@', 'test') # Add substitutions for bootstrapping the test suite configuration -import shlex -config.substitutions.append(('%{cxx}', shlex.quote('@CMAKE_CXX_COMPILER@'))) config.substitutions.append(('%{libcxx-dir}', '@LIBCXX_SOURCE_DIR@')) config.substitutions.append(('%{include-dir}', '@LIBCXX_GENERATED_INCLUDE_DIR@')) config.substitutions.append(('%{target-include-dir}', '@LIBCXX_GENERATED_INCLUDE_TARGET_DIR@')) diff --git a/libcxx/test/libcxx/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp b/libcxx/test/libcxx/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp index 3ee213358f352..08c682964c374 100644 --- a/libcxx/test/libcxx/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp +++ b/libcxx/test/libcxx/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp @@ -73,7 +73,7 @@ L link link_to_link TEST_VALIDATE_EXCEPTION( std::runtime_error, [&]([[maybe_unused]] const std::runtime_error& e) { - std::string_view what{"tzdb: requested time zone not found"}; + [[maybe_unused]] std::string_view what{"tzdb: requested time zone not found"}; TEST_LIBCPP_REQUIRE( e.what() == what, TEST_WRITE_CONCATENATED("\nExpected exception ", what, "\nActual exception ", e.what(), '\n')); diff --git a/libcxx/test/libcxx/vendor/apple/availability-with-pedantic-errors.compile.pass.cpp b/libcxx/test/libcxx/vendor/apple/availability-with-pedantic-errors.compile.pass.cpp index c55a0a4d6e5d1..60723bf7b6e97 100644 --- a/libcxx/test/libcxx/vendor/apple/availability-with-pedantic-errors.compile.pass.cpp +++ b/libcxx/test/libcxx/vendor/apple/availability-with-pedantic-errors.compile.pass.cpp @@ -15,7 +15,7 @@ // ADDITIONAL_COMPILE_FLAGS: -pedantic-errors -#include <__availability> +#include <__config> #if defined(_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) # error Availability annotations should be enabled on Apple platforms in the system configuration! diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.contains/ranges.contains_subrange.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.contains/ranges.contains_subrange.pass.cpp index 761691c2afdcb..890ac23fff832 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.contains/ranges.contains_subrange.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.contains/ranges.contains_subrange.pass.cpp @@ -24,6 +24,7 @@ // Proj1 proj1 = {}, Proj2 proj2 = {}); // since C++23 #include +#include #include #include #include @@ -130,10 +131,10 @@ constexpr void test_iterators() { } { // range has zero length - int a[] = {}; - int p[] = {3, 4, 2}; - auto whole = std::ranges::subrange(Iter1(a), Sent1(Iter1(a))); - auto subrange = std::ranges::subrange(Iter2(p), Sent2(Iter2(std::end(p)))); + std::array a = {}; + int p[] = {3, 4, 2}; + auto whole = std::ranges::subrange(Iter1(a.data()), Sent1(Iter1(a.data()))); + auto subrange = std::ranges::subrange(Iter2(p), Sent2(Iter2(std::end(p)))); { bool ret = std::ranges::contains_subrange(whole.begin(), whole.end(), subrange.begin(), subrange.end()); assert(!ret); @@ -145,10 +146,10 @@ constexpr void test_iterators() { } { // subrange has zero length - int a[] = {3, 4, 2}; - int p[] = {}; - auto whole = std::ranges::subrange(Iter1(a), Sent1(Iter1(std::end(a)))); - auto subrange = std::ranges::subrange(Iter2(p), Sent2(Iter2(p))); + int a[] = {3, 4, 2}; + std::array p = {}; + auto whole = std::ranges::subrange(Iter1(a), Sent1(Iter1(std::end(a)))); + auto subrange = std::ranges::subrange(Iter2(p.data()), Sent2(Iter2(p.data()))); { bool ret = std::ranges::contains_subrange(whole.begin(), whole.end(), subrange.begin(), subrange.end()); assert(ret); @@ -160,10 +161,10 @@ constexpr void test_iterators() { } { // range and subrange both have zero length - int a[] = {}; - int p[] = {}; - auto whole = std::ranges::subrange(Iter1(a), Sent1(Iter1(a))); - auto subrange = std::ranges::subrange(Iter2(p), Sent2(Iter2(p))); + std::array a = {}; + std::array p = {}; + auto whole = std::ranges::subrange(Iter1(a.data()), Sent1(Iter1(a.data()))); + auto subrange = std::ranges::subrange(Iter2(p.data()), Sent2(Iter2(p.data()))); { bool ret = std::ranges::contains_subrange(whole.begin(), whole.end(), subrange.begin(), subrange.end()); assert(ret); diff --git a/libcxx/test/std/atomics/atomics.ref/compare_exchange_strong.pass.cpp b/libcxx/test/std/atomics/atomics.ref/compare_exchange_strong.pass.cpp index 72b2f444c476c..90aa5ea5b6df4 100644 --- a/libcxx/test/std/atomics/atomics.ref/compare_exchange_strong.pass.cpp +++ b/libcxx/test/std/atomics/atomics.ref/compare_exchange_strong.pass.cpp @@ -9,6 +9,9 @@ // XFAIL: !has-64-bit-atomics // XFAIL: !has-1024-bit-atomics +// MSVC warning C4310: cast truncates constant value +// ADDITIONAL_COMPILE_FLAGS(cl-style-warnings): /wd4310 + // bool compare_exchange_strong(T&, T, memory_order, memory_order) const noexcept; // bool compare_exchange_strong(T&, T, memory_order = memory_order::seq_cst) const noexcept; diff --git a/libcxx/test/std/atomics/atomics.ref/compare_exchange_weak.pass.cpp b/libcxx/test/std/atomics/atomics.ref/compare_exchange_weak.pass.cpp index 5219a8e3714f9..99c1385a2fe0b 100644 --- a/libcxx/test/std/atomics/atomics.ref/compare_exchange_weak.pass.cpp +++ b/libcxx/test/std/atomics/atomics.ref/compare_exchange_weak.pass.cpp @@ -9,6 +9,9 @@ // XFAIL: !has-64-bit-atomics // XFAIL: !has-1024-bit-atomics +// MSVC warning C4310: cast truncates constant value +// ADDITIONAL_COMPILE_FLAGS(cl-style-warnings): /wd4310 + // bool compare_exchange_weak(T&, T, memory_order, memory_order) const noexcept; // bool compare_exchange_weak(T&, T, memory_order = memory_order::seq_cst) const noexcept; diff --git a/libcxx/test/std/atomics/atomics.ref/wait.pass.cpp b/libcxx/test/std/atomics/atomics.ref/wait.pass.cpp index e5310febf5c5e..f246803ba2592 100644 --- a/libcxx/test/std/atomics/atomics.ref/wait.pass.cpp +++ b/libcxx/test/std/atomics/atomics.ref/wait.pass.cpp @@ -11,6 +11,9 @@ // XFAIL: !has-64-bit-atomics // XFAIL: !has-1024-bit-atomics +// MSVC warning C4310: cast truncates constant value +// ADDITIONAL_COMPILE_FLAGS(cl-style-warnings): /wd4310 + // void wait(T, memory_order = memory_order::seq_cst) const noexcept; #include diff --git a/libcxx/test/std/containers/sequences/array/lwg3382.compile.pass.cpp b/libcxx/test/std/containers/sequences/array/lwg3382.compile.pass.cpp new file mode 100644 index 0000000000000..8eed20990cc00 --- /dev/null +++ b/libcxx/test/std/containers/sequences/array/lwg3382.compile.pass.cpp @@ -0,0 +1,25 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// +// UNSUPPORTED: c++03, c++11, c++14, c++17 + +#include + +template +struct Test {}; + +void test() { + // LWG 3382. NTTP for pair and array + // https://cplusplus.github.io/LWG/issue3382 + constexpr std::array a{}; + [[maybe_unused]] Test test1{}; + + constexpr std::array b{}; + [[maybe_unused]] Test test2{}; +} diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/destory_elements.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/destroy_elements.pass.cpp similarity index 100% rename from libcxx/test/std/containers/sequences/vector/vector.modifiers/destory_elements.pass.cpp rename to libcxx/test/std/containers/sequences/vector/vector.modifiers/destroy_elements.pass.cpp diff --git a/libcxx/test/std/containers/views/views.span/span.cons/initializer_list.pass.cpp b/libcxx/test/std/containers/views/views.span/span.cons/initializer_list.pass.cpp index 74a5094f61261..bc76e23fea3c0 100644 --- a/libcxx/test/std/containers/views/views.span/span.cons/initializer_list.pass.cpp +++ b/libcxx/test/std/containers/views/views.span/span.cons/initializer_list.pass.cpp @@ -93,9 +93,9 @@ constexpr bool test() { // Test P2447R4 "Annex C examples" -constexpr int three(std::span sp) { return sp.size(); } +constexpr int three(std::span sp) { return static_cast(sp.size()); } -constexpr int four(std::span sp) { return sp.size(); } +constexpr int four(std::span sp) { return static_cast(sp.size()); } bool test_P2447R4_annex_c_examples() { // 1. Overload resolution is affected diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/generic_category.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/generic_category.pass.cpp index d4bbde75ae882..7283fdc769d86 100644 --- a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/generic_category.pass.cpp +++ b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/generic_category.pass.cpp @@ -50,13 +50,16 @@ int main(int, char**) // responds with an empty message, which we probably want to // treat as a failure code otherwise, but we can detect that // with the preprocessor. +#if defined(_NEWLIB_VERSION) + const bool is_newlib = true; +#else + const bool is_newlib = false; +#endif + (void)is_newlib; LIBCPP_ASSERT(msg.rfind("Error -1 occurred", 0) == 0 // AIX || msg.rfind("No error information", 0) == 0 // Musl || msg.rfind("Unknown error", 0) == 0 // Glibc -#if defined(_NEWLIB_VERSION) - || msg.empty() -#endif - ); + || (is_newlib && msg.empty())); assert(errno == E2BIG); } diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/system_category.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/system_category.pass.cpp index eefbddd27a7f5..02a1baf599983 100644 --- a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/system_category.pass.cpp +++ b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/system_category.pass.cpp @@ -56,13 +56,16 @@ int main(int, char**) { // responds with an empty message, which we probably want to // treat as a failure code otherwise, but we can detect that // with the preprocessor. +#if defined(_NEWLIB_VERSION) + const bool is_newlib = true; +#else + const bool is_newlib = false; +#endif + (void)is_newlib; LIBCPP_ASSERT(msg.rfind("Error -1 occurred", 0) == 0 // AIX || msg.rfind("No error information", 0) == 0 // Musl || msg.rfind("Unknown error", 0) == 0 // Glibc -#if defined(_NEWLIB_VERSION) - || msg.empty() -#endif - ); + || (is_newlib && msg.empty())); assert(errno == E2BIG); } diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/path.pass.cpp index 5edf22eaacf31..d6bb56d9b78b7 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/path.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/path.pass.cpp @@ -37,7 +37,7 @@ constexpr bool test_non_convert_to_path() { static_assert(!std::is_constructible_v>); // Char* pointers - if constexpr (!std::is_same_v) + if constexpr (!std::is_same_v && !std::is_same_v) static_assert(!std::is_constructible_v); // Iterators diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/path.pass.cpp index 2f27fd8e6e93d..792b65615679a 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/path.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/path.pass.cpp @@ -38,7 +38,7 @@ constexpr bool test_non_convert_to_path() { static_assert(!std::is_constructible_v>); // Char* pointers - if constexpr (!std::is_same_v) + if constexpr (!std::is_same_v && !std::is_same_v) static_assert(!std::is_constructible_v); // Iterators diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/path.pass.cpp index e55adfd83fc3c..602bdadd85813 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/path.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/path.pass.cpp @@ -37,7 +37,7 @@ constexpr bool test_non_convert_to_path() { static_assert(!std::is_constructible_v>); // Char* pointers - if constexpr (!std::is_same_v) + if constexpr (!std::is_same_v && !std::is_same_v) static_assert(!std::is_constructible_v); // Iterators diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/sized_sentinel.compile.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/sized_sentinel.compile.pass.cpp index cb49086dd6802..998b13ed49455 100644 --- a/libcxx/test/std/iterators/predef.iterators/move.iterators/sized_sentinel.compile.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/sized_sentinel.compile.pass.cpp @@ -21,6 +21,7 @@ struct unsized_it { using difference_type = std::ptrdiff_t; value_type& operator*() const; + unsized_it& operator++(); bool operator==(const unsized_it&) const; difference_type operator-(const unsized_it&) const { return 0; } }; diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/filesystem.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/filesystem.version.compile.pass.cpp index 308cc2d43b058..4aba33482f69c 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/filesystem.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/filesystem.version.compile.pass.cpp @@ -20,7 +20,7 @@ /* Constant Value __cpp_lib_char8_t 201907L [C++20] __cpp_lib_filesystem 201703L [C++17] - __cpp_lib_format_path 202403L [C++23] + __cpp_lib_format_path 202403L [C++26] */ #include @@ -37,7 +37,7 @@ # endif # ifdef __cpp_lib_format_path -# error "__cpp_lib_format_path should not be defined before c++23" +# error "__cpp_lib_format_path should not be defined before c++26" # endif #elif TEST_STD_VER == 14 @@ -51,7 +51,7 @@ # endif # ifdef __cpp_lib_format_path -# error "__cpp_lib_format_path should not be defined before c++23" +# error "__cpp_lib_format_path should not be defined before c++26" # endif #elif TEST_STD_VER == 17 @@ -74,7 +74,7 @@ # endif # ifdef __cpp_lib_format_path -# error "__cpp_lib_format_path should not be defined before c++23" +# error "__cpp_lib_format_path should not be defined before c++26" # endif #elif TEST_STD_VER == 20 @@ -106,7 +106,7 @@ # endif # ifdef __cpp_lib_format_path -# error "__cpp_lib_format_path should not be defined before c++23" +# error "__cpp_lib_format_path should not be defined before c++26" # endif #elif TEST_STD_VER == 23 @@ -137,17 +137,8 @@ # endif # endif -# if !defined(_LIBCPP_VERSION) -# ifndef __cpp_lib_format_path -# error "__cpp_lib_format_path should be defined in c++23" -# endif -# if __cpp_lib_format_path != 202403L -# error "__cpp_lib_format_path should have the value 202403L in c++23" -# endif -# else // _LIBCPP_VERSION -# ifdef __cpp_lib_format_path -# error "__cpp_lib_format_path should not be defined because it is unimplemented in libc++!" -# endif +# ifdef __cpp_lib_format_path +# error "__cpp_lib_format_path should not be defined before c++26" # endif #elif TEST_STD_VER > 23 diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/string.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/string.version.compile.pass.cpp index 16a9a0a28de63..af6386a40a458 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/string.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/string.version.compile.pass.cpp @@ -29,7 +29,7 @@ __cpp_lib_string_udls 201304L [C++14] __cpp_lib_string_view 201606L [C++17] 201803L [C++20] - __cpp_lib_to_string 202306L [C++23] + __cpp_lib_to_string 202306L [C++26] */ #include @@ -86,7 +86,7 @@ # endif # ifdef __cpp_lib_to_string -# error "__cpp_lib_to_string should not be defined before c++23" +# error "__cpp_lib_to_string should not be defined before c++26" # endif #elif TEST_STD_VER == 14 @@ -143,7 +143,7 @@ # endif # ifdef __cpp_lib_to_string -# error "__cpp_lib_to_string should not be defined before c++23" +# error "__cpp_lib_to_string should not be defined before c++26" # endif #elif TEST_STD_VER == 17 @@ -209,7 +209,7 @@ # endif # ifdef __cpp_lib_to_string -# error "__cpp_lib_to_string should not be defined before c++23" +# error "__cpp_lib_to_string should not be defined before c++26" # endif #elif TEST_STD_VER == 20 @@ -293,7 +293,7 @@ # endif # ifdef __cpp_lib_to_string -# error "__cpp_lib_to_string should not be defined before c++23" +# error "__cpp_lib_to_string should not be defined before c++26" # endif #elif TEST_STD_VER == 23 @@ -385,17 +385,8 @@ # error "__cpp_lib_string_view should have the value 201803L in c++23" # endif -# if !defined(_LIBCPP_VERSION) -# ifndef __cpp_lib_to_string -# error "__cpp_lib_to_string should be defined in c++23" -# endif -# if __cpp_lib_to_string != 202306L -# error "__cpp_lib_to_string should have the value 202306L in c++23" -# endif -# else // _LIBCPP_VERSION -# ifdef __cpp_lib_to_string -# error "__cpp_lib_to_string should not be defined because it is unimplemented in libc++!" -# endif +# ifdef __cpp_lib_to_string +# error "__cpp_lib_to_string should not be defined before c++26" # endif #elif TEST_STD_VER > 23 diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp index 7829e06f90760..c1e1f9f340af4 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp @@ -88,7 +88,7 @@ __cpp_lib_expected 202211L [C++23] __cpp_lib_filesystem 201703L [C++17] __cpp_lib_format 202106L [C++20] - __cpp_lib_format_path 202403L [C++23] + __cpp_lib_format_path 202403L [C++26] __cpp_lib_format_ranges 202207L [C++23] __cpp_lib_format_uchar 202311L [C++20] __cpp_lib_formatters 202302L [C++23] @@ -216,7 +216,7 @@ __cpp_lib_to_array 201907L [C++20] __cpp_lib_to_chars 201611L [C++17] 202306L [C++26] - __cpp_lib_to_string 202306L [C++23] + __cpp_lib_to_string 202306L [C++26] __cpp_lib_to_underlying 202102L [C++23] __cpp_lib_transformation_trait_aliases 201304L [C++14] __cpp_lib_transparent_operators 201210L [C++14] @@ -513,7 +513,7 @@ # endif # ifdef __cpp_lib_format_path -# error "__cpp_lib_format_path should not be defined before c++23" +# error "__cpp_lib_format_path should not be defined before c++26" # endif # ifdef __cpp_lib_format_ranges @@ -1005,7 +1005,7 @@ # endif # ifdef __cpp_lib_to_string -# error "__cpp_lib_to_string should not be defined before c++23" +# error "__cpp_lib_to_string should not be defined before c++26" # endif # ifdef __cpp_lib_to_underlying @@ -1348,7 +1348,7 @@ # endif # ifdef __cpp_lib_format_path -# error "__cpp_lib_format_path should not be defined before c++23" +# error "__cpp_lib_format_path should not be defined before c++26" # endif # ifdef __cpp_lib_format_ranges @@ -1891,7 +1891,7 @@ # endif # ifdef __cpp_lib_to_string -# error "__cpp_lib_to_string should not be defined before c++23" +# error "__cpp_lib_to_string should not be defined before c++26" # endif # ifdef __cpp_lib_to_underlying @@ -2303,7 +2303,7 @@ # endif # ifdef __cpp_lib_format_path -# error "__cpp_lib_format_path should not be defined before c++23" +# error "__cpp_lib_format_path should not be defined before c++26" # endif # ifdef __cpp_lib_format_ranges @@ -2972,7 +2972,7 @@ # endif # ifdef __cpp_lib_to_string -# error "__cpp_lib_to_string should not be defined before c++23" +# error "__cpp_lib_to_string should not be defined before c++26" # endif # ifdef __cpp_lib_to_underlying @@ -3543,7 +3543,7 @@ # endif # ifdef __cpp_lib_format_path -# error "__cpp_lib_format_path should not be defined before c++23" +# error "__cpp_lib_format_path should not be defined before c++26" # endif # ifdef __cpp_lib_format_ranges @@ -4350,7 +4350,7 @@ # endif # ifdef __cpp_lib_to_string -# error "__cpp_lib_to_string should not be defined before c++23" +# error "__cpp_lib_to_string should not be defined before c++26" # endif # ifdef __cpp_lib_to_underlying @@ -4971,17 +4971,8 @@ # endif # endif -# if !defined(_LIBCPP_VERSION) -# ifndef __cpp_lib_format_path -# error "__cpp_lib_format_path should be defined in c++23" -# endif -# if __cpp_lib_format_path != 202403L -# error "__cpp_lib_format_path should have the value 202403L in c++23" -# endif -# else // _LIBCPP_VERSION -# ifdef __cpp_lib_format_path -# error "__cpp_lib_format_path should not be defined because it is unimplemented in libc++!" -# endif +# ifdef __cpp_lib_format_path +# error "__cpp_lib_format_path should not be defined before c++26" # endif # ifndef __cpp_lib_format_ranges @@ -5943,17 +5934,8 @@ # endif # endif -# if !defined(_LIBCPP_VERSION) -# ifndef __cpp_lib_to_string -# error "__cpp_lib_to_string should be defined in c++23" -# endif -# if __cpp_lib_to_string != 202306L -# error "__cpp_lib_to_string should have the value 202306L in c++23" -# endif -# else // _LIBCPP_VERSION -# ifdef __cpp_lib_to_string -# error "__cpp_lib_to_string should not be defined because it is unimplemented in libc++!" -# endif +# ifdef __cpp_lib_to_string +# error "__cpp_lib_to_string should not be defined before c++26" # endif # ifndef __cpp_lib_to_underlying diff --git a/libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp b/libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp index 212804356a056..6a9ec1a2ffec2 100644 --- a/libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp +++ b/libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -57,22 +58,26 @@ T basic_gcd_(T m, T n) { template T basic_gcd(T m, T n) { using Tp = std::make_unsigned_t; - if (m < 0 && m != std::numeric_limits::min()) - m = -m; - if (n < 0 && n != std::numeric_limits::min()) - n = -n; + if constexpr (std::is_signed_v) { + if (m < 0 && m != std::numeric_limits::min()) + m = -m; + if (n < 0 && n != std::numeric_limits::min()) + n = -n; + } return basic_gcd_(static_cast(m), static_cast(n)); } template void do_fuzzy_tests() { std::mt19937 gen(1938); - std::uniform_int_distribution distrib; + using DistIntType = std::conditional_t; // See N4981 [rand.req.genl]/1.5 + constexpr Input max_input = std::numeric_limits::max(); + std::uniform_int_distribution distrib(0, max_input); constexpr int nb_rounds = 10000; for (int i = 0; i < nb_rounds; ++i) { - Input n = distrib(gen); - Input m = distrib(gen); + Input n = static_cast(distrib(gen)); + Input m = static_cast(distrib(gen)); assert(std::gcd(n, m) == basic_gcd(n, m)); } } diff --git a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/current_zone.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/current_zone.pass.cpp index 2c43e121613c7..f31a679dd6214 100644 --- a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/current_zone.pass.cpp +++ b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/current_zone.pass.cpp @@ -32,7 +32,7 @@ static void set_tz(std::string zone) { // Unlike POSIX it does not mention the string of putenv becomes part // of the environment. - int status = _putenv_s("TZ", zone.c_str(), 1); + int status = _putenv_s("TZ", zone.c_str()); assert(status == 0); } diff --git a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/locate_zone.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/locate_zone.pass.cpp index 4d600fcdf40e3..8dd895fd21814 100644 --- a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/locate_zone.pass.cpp +++ b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/locate_zone.pass.cpp @@ -40,7 +40,7 @@ static void test_exception([[maybe_unused]] std::string_view zone) { TEST_VALIDATE_EXCEPTION( std::runtime_error, [&]([[maybe_unused]] const std::runtime_error& e) { - std::string_view what{"tzdb: requested time zone not found"}; + [[maybe_unused]] std::string_view what{"tzdb: requested time zone not found"}; TEST_LIBCPP_REQUIRE( e.what() == what, TEST_WRITE_CONCATENATED("\nExpected exception ", what, "\nActual exception ", e.what(), '\n')); diff --git a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/current_zone.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/current_zone.pass.cpp index e6497e26323ce..98509c298ebcb 100644 --- a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/current_zone.pass.cpp +++ b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/current_zone.pass.cpp @@ -34,7 +34,7 @@ static void set_tz(std::string zone) { // Unlike POSIX it does not mention the string of putenv becomes part // of the environment. - int status = _putenv_s("TZ", zone.c_str(), 1); + int status = _putenv_s("TZ", zone.c_str()); assert(status == 0); } diff --git a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp index f929dafcc9683..08ce48dfd0edb 100644 --- a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp +++ b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp @@ -42,7 +42,7 @@ static void test_exception([[maybe_unused]] std::string_view zone) { TEST_VALIDATE_EXCEPTION( std::runtime_error, [&]([[maybe_unused]] const std::runtime_error& e) { - std::string_view what{"tzdb: requested time zone not found"}; + [[maybe_unused]] std::string_view what{"tzdb: requested time zone not found"}; TEST_LIBCPP_REQUIRE( e.what() == what, TEST_WRITE_CONCATENATED("\nExpected exception ", what, "\nActual exception ", e.what(), '\n')); diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/deduct_F.pass.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/deduct_F.pass.cpp index ef43ab9b64b5b..381bcda761700 100644 --- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/deduct_F.pass.cpp +++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/deduct_F.pass.cpp @@ -118,10 +118,14 @@ int main(int, char**) { // Make sure we fail in a SFINAE-friendly manner when we try to deduce // from a type without a valid call operator. template ()})> -constexpr bool can_deduce() { return true; } +constexpr bool can_deduce_test(int) { return true; } template -constexpr bool can_deduce(...) { return false; } +constexpr bool can_deduce_test(...) { return false; } +template +constexpr bool can_deduce = can_deduce_test(0); + +struct valid { int operator()() const; }; struct invalid1 { }; struct invalid2 { template @@ -131,6 +135,22 @@ struct invalid3 { void operator()(int); void operator()(long); }; -static_assert(!can_deduce()); -static_assert(!can_deduce()); -static_assert(!can_deduce()); +static_assert( can_deduce); +static_assert(!can_deduce); +static_assert(!can_deduce); +static_assert(!can_deduce); + + +// LWG 3238. Insufficiently-defined behavior of std::function deduction guides +// https://cplusplus.github.io/LWG/issue3238 +// The deduction guides for std::function do not handle rvalue-ref qualified +// call operators and C-style variadics. It also doesn't deduce from nullptr_t. +// Make sure we stick to the specification. + +struct invalid_rvalue_ref { R operator()() && { return {}; } }; +struct invalid_c_vararg { R operator()(int, ...) { return {}; } }; + +static_assert(!can_deduce); +static_assert(!can_deduce); +static_assert(!can_deduce); + diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/deduct_F.verify.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/deduct_F.verify.cpp deleted file mode 100644 index 8a42d3be3571c..0000000000000 --- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/deduct_F.verify.cpp +++ /dev/null @@ -1,30 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// template -// function(F) -> function; - -// UNSUPPORTED: c++03, c++11, c++14 - -// The deduction guides for std::function do not handle rvalue-ref qualified -// call operators and C-style variadics. It also doesn't deduce from nullptr_t. -// Make sure we stick to the specification. - -#include - -struct R { }; -struct f0 { R operator()() && { return {}; } }; -struct f1 { R operator()(int, ...) { return {}; } }; - -void f() { - std::function f = f0{}; // expected-error{{no viable constructor or deduction guide for deduction of template arguments of 'function'}} - std::function g = f1{}; // expected-error{{no viable constructor or deduction guide for deduction of template arguments of 'function'}} - std::function h = nullptr; // expected-error{{no viable constructor or deduction guide for deduction of template arguments of 'function'}} -} diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter.pass.cpp index 49497b6956b9f..13340ed5294c0 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter.pass.cpp @@ -17,6 +17,7 @@ #include "test_macros.h" #include "deleter_types.h" +#include "types.h" struct A { static int count; @@ -28,6 +29,25 @@ struct A int A::count = 0; +// LWG 3233. Broken requirements for shared_ptr converting constructors +// https://cplusplus.github.io/LWG/issue3233 +static_assert( std::is_constructible, std::nullptr_t, test_deleter >::value, ""); +static_assert(!std::is_constructible, std::nullptr_t, bad_deleter>::value, ""); +static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter>::value, ""); +static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter>::value, ""); + +#if TEST_STD_VER >= 17 +static_assert( std::is_constructible, std::nullptr_t, test_deleter >::value, ""); +static_assert(!std::is_constructible, std::nullptr_t, bad_deleter>::value, ""); +static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter>::value, ""); +static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter>::value, ""); + +static_assert( std::is_constructible, std::nullptr_t, test_deleter >::value, ""); +static_assert(!std::is_constructible, std::nullptr_t, bad_deleter>::value, ""); +static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter>::value, ""); +static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter>::value, ""); +#endif + int main(int, char**) { { diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp index 4e9fc227b99e8..53ca6fb5b234d 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp @@ -17,6 +17,8 @@ #include "test_allocator.h" #include "min_allocator.h" +#include "types.h" + struct A { static int count; @@ -28,6 +30,25 @@ struct A int A::count = 0; +// LWG 3233. Broken requirements for shared_ptr converting constructors +// https://cplusplus.github.io/LWG/issue3233 +static_assert( std::is_constructible, std::nullptr_t, test_deleter, test_allocator >::value, ""); +static_assert(!std::is_constructible, std::nullptr_t, bad_deleter, test_allocator >::value, ""); +static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter, test_allocator >::value, ""); +static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter, test_allocator >::value, ""); + +#if TEST_STD_VER >= 17 +static_assert( std::is_constructible, std::nullptr_t, test_deleter, test_allocator >::value, ""); +static_assert(!std::is_constructible, std::nullptr_t, bad_deleter, test_allocator >::value, ""); +static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter, test_allocator >::value, ""); +static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter, test_allocator >::value, ""); + +static_assert( std::is_constructible, std::nullptr_t, test_deleter, test_allocator >::value, ""); +static_assert(!std::is_constructible, std::nullptr_t, bad_deleter, test_allocator >::value, ""); +static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter, test_allocator >::value, ""); +static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter, test_allocator >::value, ""); +#endif + int main(int, char**) { test_allocator_statistics alloc_stats; diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp index 42225a4b0be7e..9c1e9b72be573 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp @@ -17,6 +17,8 @@ #include "test_macros.h" #include "deleter_types.h" +#include "types.h" + struct A { static int count; @@ -28,38 +30,8 @@ struct A int A::count = 0; -struct bad_ty { }; - -struct bad_deleter -{ - void operator()(bad_ty) { } -}; - -struct no_move_deleter -{ - no_move_deleter(no_move_deleter const&) = delete; - no_move_deleter(no_move_deleter &&) = delete; - void operator()(int*) { } -}; - -static_assert(!std::is_move_constructible::value, ""); - -struct Base { }; -struct Derived : Base { }; - -template -class MoveDeleter -{ - MoveDeleter(); - MoveDeleter(MoveDeleter const&); -public: - MoveDeleter(MoveDeleter&&) {} - - explicit MoveDeleter(int) {} - - void operator()(T* ptr) { delete ptr; } -}; - +// LWG 3233. Broken requirements for shared_ptr converting constructors +// https://cplusplus.github.io/LWG/issue3233 // https://llvm.org/PR60258 // Invalid constructor SFINAE for std::shared_ptr's array ctors static_assert( std::is_constructible, int*, test_deleter >::value, ""); @@ -68,12 +40,12 @@ static_assert( std::is_constructible, Derived*, test_dele static_assert(!std::is_constructible, int*, test_deleter >::value, ""); #if TEST_STD_VER >= 17 -static_assert( std::is_constructible, int*, test_deleter>::value, ""); +static_assert( std::is_constructible, int*, test_deleter >::value, ""); static_assert(!std::is_constructible, int*, bad_deleter>::value, ""); -static_assert(!std::is_constructible, int(*)[], test_deleter>::value, ""); -static_assert( std::is_constructible, int*, test_deleter>::value, ""); +static_assert(!std::is_constructible, int(*)[], test_deleter >::value, ""); +static_assert( std::is_constructible, int*, test_deleter >::value, ""); static_assert(!std::is_constructible, int*, bad_deleter>::value, ""); -static_assert(!std::is_constructible, int(*)[5], test_deleter>::value, ""); +static_assert(!std::is_constructible, int(*)[5], test_deleter >::value, ""); #endif int main(int, char**) diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp index a110525b9b922..9dffbcdd59a73 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp @@ -17,6 +17,7 @@ #include "test_allocator.h" #include "min_allocator.h" +#include "types.h" struct A { static int count; @@ -28,38 +29,8 @@ struct A int A::count = 0; -struct bad_ty { }; - -struct bad_deleter -{ - void operator()(bad_ty) { } -}; - -struct no_move_deleter -{ - no_move_deleter(no_move_deleter const&) = delete; - no_move_deleter(no_move_deleter &&) = delete; - void operator()(int*) { } -}; - -static_assert(!std::is_move_constructible::value, ""); - -struct Base { }; -struct Derived : Base { }; - -template -class MoveDeleter -{ - MoveDeleter(); - MoveDeleter(MoveDeleter const&); -public: - MoveDeleter(MoveDeleter&&) {} - - explicit MoveDeleter(int) {} - - void operator()(T* ptr) { delete ptr; } -}; - +// LWG 3233. Broken requirements for shared_ptr converting constructors +// https://cplusplus.github.io/LWG/issue3233 // https://llvm.org/PR60258 // Invalid constructor SFINAE for std::shared_ptr's array ctors static_assert( std::is_constructible, int*, test_deleter, test_allocator >::value, ""); @@ -68,12 +39,12 @@ static_assert( std::is_constructible, Derived*, test_dele static_assert(!std::is_constructible, int*, test_deleter, test_allocator >::value, ""); #if TEST_STD_VER >= 17 -static_assert( std::is_constructible, int*, test_deleter, test_allocator>::value, ""); -static_assert(!std::is_constructible, int*, bad_deleter, test_allocator>::value, ""); -static_assert(!std::is_constructible, int(*)[], test_deleter, test_allocator>::value, ""); -static_assert( std::is_constructible, int*, test_deleter, test_allocator>::value, ""); -static_assert(!std::is_constructible, int*, bad_deleter, test_allocator>::value, ""); -static_assert(!std::is_constructible, int(*)[5], test_deleter, test_allocator>::value, ""); +static_assert( std::is_constructible, int*, test_deleter, test_allocator >::value, ""); +static_assert(!std::is_constructible, int*, bad_deleter, test_allocator >::value, ""); +static_assert(!std::is_constructible, int(*)[], test_deleter, test_allocator >::value, ""); +static_assert( std::is_constructible, int*, test_deleter, test_allocator >::value, ""); +static_assert(!std::is_constructible, int*, bad_deleter, test_allocator >::value, ""); +static_assert(!std::is_constructible, int(*)[5], test_deleter, test_allocator >::value, ""); #endif diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/types.h b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/types.h new file mode 100644 index 0000000000000..5bfb3d70febea --- /dev/null +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/types.h @@ -0,0 +1,49 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef TEST_STD_UTILITIES_MEMORY_UTIL_SMARTPTR_SHARED_CONST_TYPES_H +#define TEST_STD_UTILITIES_MEMORY_UTIL_SMARTPTR_SHARED_CONST_TYPES_H + +#include + +struct bad_ty {}; + +struct bad_deleter { + void operator()(bad_ty) {} +}; + +struct no_move_deleter { + no_move_deleter(no_move_deleter const&) = delete; + no_move_deleter(no_move_deleter&&) = delete; + void operator()(int*) {} +}; + +static_assert(!std::is_move_constructible::value, ""); + +struct no_nullptr_deleter { + void operator()(int*) const {} + void operator()(std::nullptr_t) const = delete; +}; + +struct Base {}; +struct Derived : Base {}; + +template +class MoveDeleter { + MoveDeleter(); + MoveDeleter(MoveDeleter const&); + +public: + MoveDeleter(MoveDeleter&&) {} + + explicit MoveDeleter(int) {} + + void operator()(T* ptr) { delete ptr; } +}; + +#endif // TEST_STD_UTILITIES_MEMORY_UTIL_SMARTPTR_SHARED_CONST_TYPES_H diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.pass.cpp index d7374351afa8b..accb601dd0036 100644 --- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.pass.cpp +++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.pass.cpp @@ -209,6 +209,7 @@ template static constexpr bool can_make_from_tuple = std::is_same_v(T{}, Tuple{})), uint8_t>; +#ifdef _LIBCPP_VERSION template auto test_make_from_tuple_impl(T&&, Tuple&& t) -> decltype(std::__make_from_tuple_impl( @@ -224,6 +225,7 @@ uint32_t test_make_from_tuple_impl(...) { template static constexpr bool can_make_from_tuple_impl = std::is_same_v(T{}, Tuple{})), uint8_t>; +#endif // _LIBCPP_VERSION struct A { int a; @@ -263,23 +265,23 @@ static_assert(can_make_from_tuple>); // Test std::__make_from_tuple_impl constraints. // reinterpret_cast -static_assert(!can_make_from_tuple_impl>); -static_assert(can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(!can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(can_make_from_tuple_impl>); // const_cast -static_assert(!can_make_from_tuple_impl>); -static_assert(!can_make_from_tuple_impl>); -static_assert(can_make_from_tuple_impl>); -static_assert(can_make_from_tuple_impl>); -static_assert(can_make_from_tuple_impl>); -static_assert(can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(!can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(!can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(can_make_from_tuple_impl>); // static_cast -static_assert(!can_make_from_tuple_impl>); -static_assert(!can_make_from_tuple_impl>); -static_assert(can_make_from_tuple_impl>); -static_assert(can_make_from_tuple_impl>); -static_assert(can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(!can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(!can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(can_make_from_tuple_impl>); } // namespace LWG3528 diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/lwg3382.compile.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/lwg3382.compile.pass.cpp new file mode 100644 index 0000000000000..dce9a5df220b2 --- /dev/null +++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/lwg3382.compile.pass.cpp @@ -0,0 +1,23 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 + +#include + +template +struct Test {}; + +void test() { + // LWG 3382. NTTP for pair and array + // https://cplusplus.github.io/LWG/issue3382 +#if !defined(_LIBCPP_DEPRECATED_ABI_DISABLE_PAIR_TRIVIAL_COPY_CTOR) + constexpr std::pair a{}; + [[maybe_unused]] Test test1{}; +#endif +} diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp index db05691c55818..039a2373348c4 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp @@ -516,7 +516,7 @@ constexpr void test_swap_sfinae() { } } -_LIBCPP_CONSTEXPR_SINCE_CXX20 void test_swap_noexcept() { +TEST_CONSTEXPR_CXX20 void test_swap_noexcept() { { using V = std::variant; static_assert(std::is_swappable_v && has_swap_member(), ""); diff --git a/libcxx/test/support/msvc_stdlib_force_include.h b/libcxx/test/support/msvc_stdlib_force_include.h index 6c26085e72c45..785670224c3b1 100644 --- a/libcxx/test/support/msvc_stdlib_force_include.h +++ b/libcxx/test/support/msvc_stdlib_force_include.h @@ -67,7 +67,6 @@ const AssertionDialogAvoider assertion_dialog_avoider{}; // Silence compiler warnings. # pragma warning(disable : 4180) // qualifier applied to function type has no meaning; ignored # pragma warning(disable : 4324) // structure was padded due to alignment specifier -# pragma warning(disable : 4521) // multiple copy constructors specified # pragma warning(disable : 4702) // unreachable code # pragma warning(disable : 28251) // Inconsistent annotation for 'new': this instance has no annotations. #endif // !defined(__clang__) @@ -91,7 +90,7 @@ const AssertionDialogAvoider assertion_dialog_avoider{}; #include #if _HAS_CXX23 -# define TEST_STD_VER 99 +# define TEST_STD_VER 23 #elif _HAS_CXX20 # define TEST_STD_VER 20 #elif _HAS_CXX17 diff --git a/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt b/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt index 28c1dbf8aca3c..f0289dc44c662 100644 --- a/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt +++ b/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt @@ -110,3 +110,5 @@ set_target_properties(cxx-tidy PROPERTIES set_target_properties(cxx-tidy PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) set(CMAKE_SHARED_MODULE_SUFFIX_CXX .plugin) # Use a portable suffix to simplify how we can find it from Lit + +add_dependencies(cxx-test-depends cxx-tidy) diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index b04cb4f511554..490ecefc97522 100755 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -61,7 +61,8 @@ def add_version_header(tc): # just libc++. It may depend on # * macros defined by the compiler itself, or # * macros generated by CMake. -# In some cases we add also depend on macros defined in <__availability>. +# In some cases we add also depend on macros defined in +# <__configuration/availability.h>. # libcxx_guard An optional string field. When this field is provided, # `test_suite_guard` must also be provided. This field is used # only to guard the feature-test macro in . It may @@ -515,7 +516,7 @@ def add_version_header(tc): }, { "name": "__cpp_lib_format_path", - "values": {"c++23": 202403}, # P2845R8: Formatting of std::filesystem::path + "values": {"c++26": 202403}, # P2845R8: Formatting of std::filesystem::path "headers": ["filesystem"], "unimplemented": True, }, @@ -1270,7 +1271,7 @@ def add_version_header(tc): }, { "name": "__cpp_lib_to_string", - "values": {"c++23": 202306}, # P2587R3 to_string or not to_string + "values": {"c++26": 202306}, # P2587R3 to_string or not to_string "headers": ["string"], "unimplemented": True, }, @@ -1562,7 +1563,6 @@ def produce_version_header(): */ -#include <__availability> #include <__config> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/utils/libcxx/test/params.py b/libcxx/utils/libcxx/test/params.py index c2d294e49f488..4c8590a2135d9 100644 --- a/libcxx/utils/libcxx/test/params.py +++ b/libcxx/utils/libcxx/test/params.py @@ -143,6 +143,14 @@ def getSuitableClangTidy(cfg): # fmt: off DEFAULT_PARAMETERS = [ + Parameter( + name="compiler", + type=str, + help="The path of the compiler to use for testing.", + actions=lambda cxx: [ + AddSubstitution("%{cxx}", shlex.quote(cxx)), + ], + ), Parameter( name="target_triple", type=str, diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt index f7673da25d20e..86fe4a604f30d 100644 --- a/libcxxabi/CMakeLists.txt +++ b/libcxxabi/CMakeLists.txt @@ -443,6 +443,9 @@ if (NOT "${LIBCXXABI_LIBUNWIND_INCLUDES_INTERNAL}" STREQUAL "") include_directories("${LIBCXXABI_LIBUNWIND_INCLUDES_INTERNAL}") endif() +add_custom_target(cxxabi-test-depends + COMMENT "Build dependencies required to run the libc++abi test suite.") + # Add source code. This also contains all of the logic for deciding linker flags # soname, etc... add_subdirectory(include) diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt index c8cc93de50777..c54ced4dc3ea8 100644 --- a/libcxxabi/src/CMakeLists.txt +++ b/libcxxabi/src/CMakeLists.txt @@ -304,6 +304,7 @@ endif() # Add a meta-target for both libraries. add_custom_target(cxxabi DEPENDS ${LIBCXXABI_BUILD_TARGETS}) +add_dependencies(cxxabi-test-depends cxxabi cxx) if (LIBCXXABI_INSTALL_LIBRARY) install(TARGETS ${LIBCXXABI_INSTALL_TARGETS} diff --git a/libcxxabi/test/CMakeLists.txt b/libcxxabi/test/CMakeLists.txt index 586927189cf1d..8e3048f2ffe8a 100644 --- a/libcxxabi/test/CMakeLists.txt +++ b/libcxxabi/test/CMakeLists.txt @@ -10,20 +10,11 @@ endmacro() pythonize_bool(LIBCXXABI_USE_LLVM_UNWINDER) -if (LIBCXXABI_ENABLE_SHARED) - set(LIBCXXABI_TEST_DEPS cxxabi_shared) -else() - set(LIBCXXABI_TEST_DEPS cxxabi_static) -endif() - -list(APPEND LIBCXXABI_TEST_DEPS cxx) -if (LIBCXXABI_USE_LLVM_UNWINDER AND TARGET unwind) - list(APPEND LIBCXXABI_TEST_DEPS unwind) -endif() - set(AUTO_GEN_COMMENT "## Autogenerated by libcxxabi configuration.\n# Do not edit!") set(SERIALIZED_LIT_PARAMS "# Lit parameters serialized here for llvm-lit to pick them up\n") +serialize_lit_string_param(SERIALIZED_LIT_PARAMS compiler "${CMAKE_CXX_COMPILER}") + if (NOT LIBCXXABI_ENABLE_EXCEPTIONS) serialize_lit_param(SERIALIZED_LIT_PARAMS enable_exceptions False) endif() @@ -57,4 +48,4 @@ configure_lit_site_cfg( add_lit_testsuite(check-cxxabi "Running libcxxabi tests" ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS ${LIBCXXABI_TEST_DEPS}) + DEPENDS cxxabi-test-depends) diff --git a/libcxxabi/test/configs/cmake-bridge.cfg.in b/libcxxabi/test/configs/cmake-bridge.cfg.in index 1d0f51d37437b..3fefc6a7fdc88 100644 --- a/libcxxabi/test/configs/cmake-bridge.cfg.in +++ b/libcxxabi/test/configs/cmake-bridge.cfg.in @@ -26,7 +26,6 @@ config.test_exec_root = os.path.join('@CMAKE_BINARY_DIR@', 'test') # TODO: This is a non-standard Lit attribute and we should have another way of accessing this. config.host_triple = '@LLVM_HOST_TRIPLE@' -config.substitutions.append(('%{cxx}', '@CMAKE_CXX_COMPILER@')) config.substitutions.append(('%{libcxx}', '@LIBCXXABI_LIBCXX_PATH@')) config.substitutions.append(('%{include}', '@LIBCXXABI_SOURCE_DIR@/include')) config.substitutions.append(('%{cxx-include}', '@LIBCXXABI_HEADER_DIR@/include/c++/v1')) diff --git a/libunwind/test/CMakeLists.txt b/libunwind/test/CMakeLists.txt index 21dfbb0a84f0a..19f055f6f93ff 100644 --- a/libunwind/test/CMakeLists.txt +++ b/libunwind/test/CMakeLists.txt @@ -15,6 +15,8 @@ pythonize_bool(LIBUNWIND_USES_ARM_EHABI) set(AUTO_GEN_COMMENT "## Autogenerated by libunwind configuration.\n# Do not edit!") set(SERIALIZED_LIT_PARAMS "# Lit parameters serialized here for llvm-lit to pick them up\n") +serialize_lit_string_param(SERIALIZED_LIT_PARAMS compiler "${CMAKE_CXX_COMPILER}") + if (LIBUNWIND_EXECUTOR) message(DEPRECATION "LIBUNWIND_EXECUTOR is deprecated, please add executor=... to LIBUNWIND_TEST_PARAMS") serialize_lit_string_param(SERIALIZED_LIT_PARAMS executor "${LIBUNWIND_EXECUTOR}") @@ -45,4 +47,4 @@ configure_lit_site_cfg( add_lit_testsuite(check-unwind "Running libunwind tests" ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS unwind ${LIBUNWIND_TEST_DEPS}) + DEPENDS unwind) diff --git a/libunwind/test/configs/cmake-bridge.cfg.in b/libunwind/test/configs/cmake-bridge.cfg.in index c5f34c87abb92..7fc7a3da42462 100644 --- a/libunwind/test/configs/cmake-bridge.cfg.in +++ b/libunwind/test/configs/cmake-bridge.cfg.in @@ -29,7 +29,5 @@ if not @LIBUNWIND_ENABLE_THREADS@: config.available_features.add('libunwind-no-threads') # Add substitutions for bootstrapping the test suite configuration -import shlex -config.substitutions.append(('%{cxx}', shlex.quote('@CMAKE_CXX_COMPILER@'))) config.substitutions.append(('%{include}', '@LIBUNWIND_SOURCE_DIR@/include')) config.substitutions.append(('%{lib}', '@LIBUNWIND_LIBRARY_DIR@')) diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp index 9d1612beae872..6e857cfcd92f6 100644 --- a/lld/MachO/ObjC.cpp +++ b/lld/MachO/ObjC.cpp @@ -379,12 +379,21 @@ class ObjcCategoryMerger { InfoWriteSection catPtrListInfo; }; - // Information about a pointer list in the original categories (method lists, - // protocol lists, etc) + // Information about a pointer list in the original categories or class(method + // lists, protocol lists, etc) struct PointerListInfo { + PointerListInfo() = default; + PointerListInfo(const PointerListInfo &) = default; PointerListInfo(const char *_categoryPrefix, uint32_t _pointersPerStruct) : categoryPrefix(_categoryPrefix), pointersPerStruct(_pointersPerStruct) {} + + inline bool operator==(const PointerListInfo &cmp) const { + return pointersPerStruct == cmp.pointersPerStruct && + structSize == cmp.structSize && structCount == cmp.structCount && + allPtrs == cmp.allPtrs; + } + const char *categoryPrefix; uint32_t pointersPerStruct = 0; @@ -395,9 +404,9 @@ class ObjcCategoryMerger { std::vector allPtrs; }; - // Full information about all the categories that extend a class. This will - // include all the additional methods, protocols, and properties that are - // contained in all the categories that extend a particular class. + // Full information describing an ObjC class . This will include all the + // additional methods, protocols, and properties that are contained in the + // class and all the categories that extend a particular class. struct ClassExtensionInfo { ClassExtensionInfo(CategoryLayout &_catLayout) : catLayout(_catLayout){}; @@ -449,6 +458,9 @@ class ObjcCategoryMerger { void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset, PointerListInfo &ptrList); + PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec, + uint32_t secOffset); + void parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset, PointerListInfo &ptrList); @@ -456,9 +468,9 @@ class ObjcCategoryMerger { const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList); - void emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset, - const ClassExtensionInfo &extInfo, - const PointerListInfo &ptrList); + Defined *emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset, + const ClassExtensionInfo &extInfo, + const PointerListInfo &ptrList); Defined *emitCategory(const ClassExtensionInfo &extInfo); Defined *emitCatListEntrySec(const std::string &forCategoryName, @@ -474,6 +486,10 @@ class ObjcCategoryMerger { uint32_t offset); Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec, uint32_t offset); + Defined *getClassRo(const Defined *classSym, bool getMetaRo); + void mergeCategoriesIntoBaseClass(const Defined *baseClass, + std::vector &categories); + void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset); void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec, uint32_t offset); @@ -552,6 +568,29 @@ ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec, return dyn_cast_or_null(sym); } +// Get the class's ro_data symbol. If getMetaRo is true, then we will return +// the meta-class's ro_data symbol. Otherwise, we will return the class +// (instance) ro_data symbol. +Defined *ObjcCategoryMerger::getClassRo(const Defined *classSym, + bool getMetaRo) { + ConcatInputSection *isec = dyn_cast(classSym->isec()); + if (!isec) + return nullptr; + + if (!getMetaRo) + return tryGetDefinedAtIsecOffset(isec, classLayout.roDataOffset + + classSym->value); + + Defined *metaClass = tryGetDefinedAtIsecOffset( + isec, classLayout.metaClassOffset + classSym->value); + if (!metaClass) + return nullptr; + + return tryGetDefinedAtIsecOffset( + dyn_cast(metaClass->isec()), + classLayout.roDataOffset); +} + // Given an ConcatInputSection or CStringInputSection and an offset, if there is // a symbol(Defined) at that offset, then erase the symbol (mark it not live) void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset( @@ -663,6 +702,15 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec, "Protocol list end offset does not match expected size"); } +// Parse a protocol list and return the PointerListInfo for it +ObjcCategoryMerger::PointerListInfo +ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec, + uint32_t secOffset) { + PointerListInfo ptrList; + parseProtocolListInfo(isec, secOffset, ptrList); + return ptrList; +} + // Parse a pointer list that might be linked to ConcatInputSection at a given // offset. This can be used for instance methods, class methods, instance props // and class props since they have the same format. @@ -769,11 +817,11 @@ void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo, // Generate a protocol list (including header) and link it into the parent at // the specified offset. -void ObjcCategoryMerger::emitAndLinkProtocolList( +Defined *ObjcCategoryMerger::emitAndLinkProtocolList( Defined *parentSym, uint32_t linkAtOffset, const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) { if (ptrList.allPtrs.empty()) - return; + return nullptr; assert(ptrList.allPtrs.size() == ptrList.structCount); @@ -820,6 +868,8 @@ void ObjcCategoryMerger::emitAndLinkProtocolList( infoCategoryWriter.catPtrListInfo.relocTemplate); offset += target->wordSize; } + + return ptrListSym; } // Generate a pointer list (including header) and link it into the parent at the @@ -1265,10 +1315,15 @@ void ObjcCategoryMerger::removeRefsToErasedIsecs() { void ObjcCategoryMerger::doMerge() { collectAndValidateCategoriesData(); - for (auto &entry : categoryMap) - if (entry.second.size() > 1) + for (auto &[baseClass, catInfos] : categoryMap) { + if (auto *baseClassDef = dyn_cast(baseClass)) { + // Merge all categories into the base class + mergeCategoriesIntoBaseClass(baseClassDef, catInfos); + } else if (catInfos.size() > 1) { // Merge all categories into a new, single category - mergeCategoriesIntoSingleCategory(entry.second); + mergeCategoriesIntoSingleCategory(catInfos); + } + } // Erase all categories that were merged eraseMergedCategories(); @@ -1302,3 +1357,101 @@ void objc::mergeCategories() { } void objc::doCleanup() { ObjcCategoryMerger::doCleanup(); } + +void ObjcCategoryMerger::mergeCategoriesIntoBaseClass( + const Defined *baseClass, std::vector &categories) { + assert(categories.size() >= 1 && "Expected at least one category to merge"); + + // Collect all the info from the categories + ClassExtensionInfo extInfo(catLayout); + for (auto &catInfo : categories) { + parseCatInfoToExtInfo(catInfo, extInfo); + } + + // Get metadata for the base class + Defined *metaRo = getClassRo(baseClass, /*getMetaRo=*/true); + ConcatInputSection *metaIsec = dyn_cast(metaRo->isec()); + Defined *classRo = getClassRo(baseClass, /*getMetaRo=*/false); + ConcatInputSection *classIsec = dyn_cast(classRo->isec()); + + // Now collect the info from the base class from the various lists in the + // class metadata + + // Protocol lists are a special case - the same protocol list is in classRo + // and metaRo, so we only need to parse it once + parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset, + extInfo.protocols); + + // Check that the classRo and metaRo protocol lists are identical + assert( + parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset) == + parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset) && + "Category merger expects classRo and metaRo to have the same protocol " + "list"); + + parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset, + extInfo.classMethods); + parsePointerListInfo(classIsec, roClassLayout.baseMethodsOffset, + extInfo.instanceMethods); + + parsePointerListInfo(metaIsec, roClassLayout.basePropertiesOffset, + extInfo.classProps); + parsePointerListInfo(classIsec, roClassLayout.basePropertiesOffset, + extInfo.instanceProps); + + // Erase the old lists - these will be generated and replaced + eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseMethodsOffset); + eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseProtocolsOffset); + eraseSymbolAtIsecOffset(metaIsec, roClassLayout.basePropertiesOffset); + eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseMethodsOffset); + eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseProtocolsOffset); + eraseSymbolAtIsecOffset(classIsec, roClassLayout.basePropertiesOffset); + + // Emit the newly merged lists - first into the meta RO then into the class RO + // First we emit and link the protocol list into the meta RO. Then we link it + // in the classRo as well (they're supposed to be identical) + if (Defined *protoListSym = + emitAndLinkProtocolList(metaRo, roClassLayout.baseProtocolsOffset, + extInfo, extInfo.protocols)) { + createSymbolReference(classRo, protoListSym, + roClassLayout.baseProtocolsOffset, + infoCategoryWriter.catBodyInfo.relocTemplate); + } + + emitAndLinkPointerList(metaRo, roClassLayout.baseMethodsOffset, extInfo, + extInfo.classMethods); + emitAndLinkPointerList(classRo, roClassLayout.baseMethodsOffset, extInfo, + extInfo.instanceMethods); + + emitAndLinkPointerList(metaRo, roClassLayout.basePropertiesOffset, extInfo, + extInfo.classProps); + + emitAndLinkPointerList(classRo, roClassLayout.basePropertiesOffset, extInfo, + extInfo.instanceProps); + + // Mark all the categories as merged - this will be used to erase them later + for (auto &catInfo : categories) + catInfo.wasMerged = true; +} + +// Erase the symbol at a given offset in an InputSection +void ObjcCategoryMerger::eraseSymbolAtIsecOffset(ConcatInputSection *isec, + uint32_t offset) { + Defined *sym = tryGetDefinedAtIsecOffset(isec, offset); + if (!sym) + return; + + // Remove the symbol from isec->symbols + assert(isa(sym) && "Can only erase a Defined"); + llvm::erase(isec->symbols, sym); + + // Remove the relocs that refer to this symbol + auto removeAtOff = [offset](Reloc const &r) { return r.offset == offset; }; + llvm::erase_if(isec->relocs, removeAtOff); + + // Now, if the symbol fully occupies a ConcatInputSection, we can also erase + // the whole ConcatInputSection + if (ConcatInputSection *cisec = dyn_cast(sym->isec())) + if (cisec->data.size() == sym->size) + eraseISec(cisec); +} diff --git a/lld/test/MachO/objc-category-merging-complete-test.s b/lld/test/MachO/objc-category-merging-complete-test.s index 74400177b550d..cf3e19e2f9c8b 100644 --- a/lld/test/MachO/objc-category-merging-complete-test.s +++ b/lld/test/MachO/objc-category-merging-complete-test.s @@ -1,6 +1,7 @@ # REQUIRES: aarch64 # RUN: rm -rf %t; split-file %s %t && cd %t +############ Test merging multiple categories into a single category ############ ## Create a dylib to link against(a64_file1.dylib) and merge categories in the main binary (file2_merge_a64.exe) # RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o a64_file1.o a64_file1.s # RUN: %lld -arch arm64 a64_file1.o -o a64_file1.dylib -dylib @@ -12,6 +13,10 @@ # RUN: llvm-objdump --objc-meta-data --macho a64_file2_no_merge.exe | FileCheck %s --check-prefixes=NO_MERGE_CATS # RUN: llvm-objdump --objc-meta-data --macho a64_file2_merge.exe | FileCheck %s --check-prefixes=MERGE_CATS +############ Test merging multiple categories into the base class ############ +# RUN: %lld -arch arm64 -o a64_file2_merge_into_class.exe -objc_category_merging a64_file1.o a64_file2.o +# RUN: llvm-objdump --objc-meta-data --macho a64_file2_merge_into_class.exe | FileCheck %s --check-prefixes=MERGE_CATS_CLS + MERGE_CATS: __OBJC_$_CATEGORY_MyBaseClass(Category02|Category03) MERGE_CATS-NEXT: name {{.*}} Category02|Category03 @@ -101,6 +106,211 @@ NO_MERGE_CATS-NEXT: 24 NO_MERGE_CATS-NEXT: 2 +MERGE_CATS_CLS: _OBJC_CLASS_$_MyBaseClass +MERGE_CATS_CLS-NEXT: isa {{.*}} _OBJC_METACLASS_$_MyBaseClass +MERGE_CATS_CLS-NEXT: superclass 0x0 +MERGE_CATS_CLS-NEXT: cache {{.*}} __objc_empty_cache +MERGE_CATS_CLS-NEXT: vtable 0x0 +MERGE_CATS_CLS-NEXT: data {{.*}} (struct class_ro_t *) +MERGE_CATS_CLS-NEXT: flags 0x2 RO_ROOT +MERGE_CATS_CLS-NEXT: instanceStart 0 +MERGE_CATS_CLS-NEXT: instanceSize 4 +MERGE_CATS_CLS-NEXT: reserved 0x0 +MERGE_CATS_CLS-NEXT: ivarLayout 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyBaseClass +MERGE_CATS_CLS-NEXT: baseMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 8 +MERGE_CATS_CLS-NEXT: name {{.*}} class02InstanceMethod +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass(Category02) class02InstanceMethod] +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol02Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass(Category02) myProtocol02Method] +MERGE_CATS_CLS-NEXT: name {{.*}} class03InstanceMethod +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass(Category03) class03InstanceMethod] +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol03Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass(Category03) myProtocol03Method] +MERGE_CATS_CLS-NEXT: name {{.*}} baseInstanceMethod +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass baseInstanceMethod] +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol01Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass myProtocol01Method] +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass MyProtocol01Prop] +MERGE_CATS_CLS-NEXT: name {{.*}} setMyProtocol01Prop: +MERGE_CATS_CLS-NEXT: types {{.*}} v20@0:8i16 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass setMyProtocol01Prop:] +MERGE_CATS_CLS-NEXT: baseProtocols {{.*}} +MERGE_CATS_CLS-NEXT: count 3 +MERGE_CATS_CLS-NEXT: list[0] {{.*}} (struct protocol_t *) +MERGE_CATS_CLS-NEXT: isa 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol02 +MERGE_CATS_CLS-NEXT: protocols 0x0 +MERGE_CATS_CLS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 2 +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol02Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol02Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: classMethods 0x0 (struct method_list_t *) +MERGE_CATS_CLS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS_CLS-NEXT: optionalClassMethods 0x0 +MERGE_CATS_CLS-NEXT: instanceProperties {{.*}} +MERGE_CATS_CLS-NEXT: list[1] {{.*}} (struct protocol_t *) +MERGE_CATS_CLS-NEXT: isa 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol03 +MERGE_CATS_CLS-NEXT: protocols 0x0 +MERGE_CATS_CLS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 2 +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol03Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol03Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: classMethods 0x0 (struct method_list_t *) +MERGE_CATS_CLS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS_CLS-NEXT: optionalClassMethods 0x0 +MERGE_CATS_CLS-NEXT: instanceProperties {{.*}} +MERGE_CATS_CLS-NEXT: list[2] {{.*}} (struct protocol_t *) +MERGE_CATS_CLS-NEXT: isa 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01 +MERGE_CATS_CLS-NEXT: protocols 0x0 +MERGE_CATS_CLS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 3 +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol01Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} setMyProtocol01Prop: +MERGE_CATS_CLS-NEXT: types {{.*}} v20@0:8i16 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: classMethods 0x0 (struct method_list_t *) +MERGE_CATS_CLS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS_CLS-NEXT: optionalClassMethods 0x0 +MERGE_CATS_CLS-NEXT: instanceProperties {{.*}} +MERGE_CATS_CLS-NEXT: ivars {{.*}} +MERGE_CATS_CLS-NEXT: entsize 32 +MERGE_CATS_CLS-NEXT: count 1 +MERGE_CATS_CLS-NEXT: offset {{.*}} 0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01Prop +MERGE_CATS_CLS-NEXT: type {{.*}} i +MERGE_CATS_CLS-NEXT: alignment 2 +MERGE_CATS_CLS-NEXT: size 4 +MERGE_CATS_CLS-NEXT: weakIvarLayout 0x0 +MERGE_CATS_CLS-NEXT: baseProperties {{.*}} +MERGE_CATS_CLS-NEXT: entsize 16 +MERGE_CATS_CLS-NEXT: count 3 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol02Prop +MERGE_CATS_CLS-NEXT: attributes {{.*}} Ti,R,D +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol03Prop +MERGE_CATS_CLS-NEXT: attributes {{.*}} Ti,R,D +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01Prop +MERGE_CATS_CLS-NEXT: attributes {{.*}} Ti,N,VMyProtocol01Prop +MERGE_CATS_CLS-NEXT: Meta Class +MERGE_CATS_CLS-NEXT: isa {{.*}} _OBJC_METACLASS_$_MyBaseClass +MERGE_CATS_CLS-NEXT: superclass {{.*}} _OBJC_CLASS_$_MyBaseClass +MERGE_CATS_CLS-NEXT: cache {{.*}} __objc_empty_cache +MERGE_CATS_CLS-NEXT: vtable 0x0 +MERGE_CATS_CLS-NEXT: data {{.*}} (struct class_ro_t *) +MERGE_CATS_CLS-NEXT: flags 0x3 RO_META RO_ROOT +MERGE_CATS_CLS-NEXT: instanceStart 40 +MERGE_CATS_CLS-NEXT: instanceSize 40 +MERGE_CATS_CLS-NEXT: reserved 0x0 +MERGE_CATS_CLS-NEXT: ivarLayout 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyBaseClass +MERGE_CATS_CLS-NEXT: baseMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 5 +MERGE_CATS_CLS-NEXT: name {{.*}} class02ClassMethod +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp +[MyBaseClass(Category02) class02ClassMethod] +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol02Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp +[MyBaseClass(Category02) MyProtocol02Prop] +MERGE_CATS_CLS-NEXT: name {{.*}} class03ClassMethod +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp +[MyBaseClass(Category03) class03ClassMethod] +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol03Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp +[MyBaseClass(Category03) MyProtocol03Prop] +MERGE_CATS_CLS-NEXT: name {{.*}} baseClassMethod +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp +[MyBaseClass baseClassMethod] +MERGE_CATS_CLS-NEXT: baseProtocols {{.*}} +MERGE_CATS_CLS-NEXT: count 3 +MERGE_CATS_CLS-NEXT: list[0] {{.*}} (struct protocol_t *) +MERGE_CATS_CLS-NEXT: isa 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol02 +MERGE_CATS_CLS-NEXT: protocols 0x0 +MERGE_CATS_CLS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 2 +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol02Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol02Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: classMethods 0x0 (struct method_list_t *) +MERGE_CATS_CLS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS_CLS-NEXT: optionalClassMethods 0x0 +MERGE_CATS_CLS-NEXT: instanceProperties {{.*}} +MERGE_CATS_CLS-NEXT: list[1] {{.*}} (struct protocol_t *) +MERGE_CATS_CLS-NEXT: isa 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol03 +MERGE_CATS_CLS-NEXT: protocols 0x0 +MERGE_CATS_CLS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 2 +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol03Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol03Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: classMethods 0x0 (struct method_list_t *) +MERGE_CATS_CLS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS_CLS-NEXT: optionalClassMethods 0x0 +MERGE_CATS_CLS-NEXT: instanceProperties {{.*}} +MERGE_CATS_CLS-NEXT: list[2] {{.*}} (struct protocol_t *) +MERGE_CATS_CLS-NEXT: isa 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01 +MERGE_CATS_CLS-NEXT: protocols 0x0 +MERGE_CATS_CLS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 3 +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol01Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} setMyProtocol01Prop: +MERGE_CATS_CLS-NEXT: types {{.*}} v20@0:8i16 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: classMethods 0x0 (struct method_list_t *) +MERGE_CATS_CLS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS_CLS-NEXT: optionalClassMethods 0x0 +MERGE_CATS_CLS-NEXT: instanceProperties {{.*}} +MERGE_CATS_CLS-NEXT: ivars 0x0 +MERGE_CATS_CLS-NEXT: weakIvarLayout 0x0 +MERGE_CATS_CLS-NEXT: baseProperties 0x0 +MERGE_CATS_CLS: __OBJC_$_CATEGORY_MyBaseClass_$_Category04 + + #--- a64_file1.s ## @protocol MyProtocol01 diff --git a/lld/test/MachO/objc-category-merging-extern-class-minimal.s b/lld/test/MachO/objc-category-merging-minimal.s similarity index 59% rename from lld/test/MachO/objc-category-merging-extern-class-minimal.s rename to lld/test/MachO/objc-category-merging-minimal.s index 5dd8924df5ad6..fcd90f178b150 100644 --- a/lld/test/MachO/objc-category-merging-extern-class-minimal.s +++ b/lld/test/MachO/objc-category-merging-minimal.s @@ -1,7 +1,8 @@ # REQUIRES: aarch64 # RUN: rm -rf %t; split-file %s %t && cd %t -## Create a dylib with a fake base class to link against +############ Test merging multiple categories into a single category ############ +## Create a dylib with a fake base class to link against in when merging between categories # RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o a64_fakedylib.o a64_fakedylib.s # RUN: %lld -arch arm64 a64_fakedylib.o -o a64_fakedylib.dylib -dylib @@ -14,6 +15,15 @@ # RUN: llvm-objdump --objc-meta-data --macho merge_cat_minimal_no_merge.dylib | FileCheck %s --check-prefixes=NO_MERGE_CATS # RUN: llvm-objdump --objc-meta-data --macho merge_cat_minimal_merge.dylib | FileCheck %s --check-prefixes=MERGE_CATS +############ Test merging multiple categories into the base class ############ +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o merge_base_class_minimal.o merge_base_class_minimal.s +# RUN: %lld -arch arm64 -dylib -o merge_base_class_minimal_yes_merge.dylib -objc_category_merging merge_base_class_minimal.o merge_cat_minimal.o +# RUN: %lld -arch arm64 -dylib -o merge_base_class_minimal_no_merge.dylib merge_base_class_minimal.o merge_cat_minimal.o + +# RUN: llvm-objdump --objc-meta-data --macho merge_base_class_minimal_no_merge.dylib | FileCheck %s --check-prefixes=NO_MERGE_INTO_BASE +# RUN: llvm-objdump --objc-meta-data --macho merge_base_class_minimal_yes_merge.dylib | FileCheck %s --check-prefixes=YES_MERGE_INTO_BASE + + #### Check merge categories enabled ### # Check that the original categories are not there MERGE_CATS-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category01 @@ -44,6 +54,28 @@ NO_MERGE_CATS: __OBJC_$_CATEGORY_MyBaseClass_$_Category01 NO_MERGE_CATS: __OBJC_$_CATEGORY_MyBaseClass_$_Category02 +#### Check merge cateogires into base class is disabled #### +NO_MERGE_INTO_BASE: __OBJC_$_CATEGORY_MyBaseClass_$_Category01 +NO_MERGE_INTO_BASE: __OBJC_$_CATEGORY_MyBaseClass_$_Category02 + +#### Check merge cateogires into base class is enabled and categories are merged into base class #### +YES_MERGE_INTO_BASE-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category01 +YES_MERGE_INTO_BASE-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category02 + +YES_MERGE_INTO_BASE: _OBJC_CLASS_$_MyBaseClass +YES_MERGE_INTO_BASE-NEXT: _OBJC_METACLASS_$_MyBaseClass +YES_MERGE_INTO_BASE: baseMethods +YES_MERGE_INTO_BASE-NEXT: entsize 24 +YES_MERGE_INTO_BASE-NEXT: count 3 +YES_MERGE_INTO_BASE-NEXT: name {{.*}} cat01_InstanceMethod +YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8 +YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass(Category01) cat01_InstanceMethod] +YES_MERGE_INTO_BASE-NEXT: name {{.*}} cat02_InstanceMethod +YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8 +YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass(Category02) cat02_InstanceMethod] +YES_MERGE_INTO_BASE-NEXT: name {{.*}} baseInstanceMethod +YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8 +YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass baseInstanceMethod] #--- a64_fakedylib.s @@ -156,3 +188,94 @@ L_OBJC_IMAGE_INFO: .addrsig .addrsig_sym __OBJC_$_CATEGORY_MyBaseClass_$_Category01 + +#--- merge_base_class_minimal.s +; clang -c merge_base_class_minimal.mm -O3 -target arm64-apple-macos -arch arm64 -S -o merge_base_class_minimal.s +; ================== Generated from ObjC: ================== +; __attribute__((objc_root_class)) +; @interface MyBaseClass +; - (void)baseInstanceMethod; +; @end +; +; @implementation MyBaseClass +; - (void)baseInstanceMethod {} +; @end +; ================== Generated from ObjC ================== + .section __TEXT,__text,regular,pure_instructions + .build_version macos, 11, 0 + .p2align 2 +"-[MyBaseClass baseInstanceMethod]": + .cfi_startproc +; %bb.0: + ret + .cfi_endproc + .section __DATA,__objc_data + .globl _OBJC_CLASS_$_MyBaseClass + .p2align 3, 0x0 +_OBJC_CLASS_$_MyBaseClass: + .quad _OBJC_METACLASS_$_MyBaseClass + .quad 0 + .quad 0 + .quad 0 + .quad __OBJC_CLASS_RO_$_MyBaseClass + .globl _OBJC_METACLASS_$_MyBaseClass + .p2align 3, 0x0 +_OBJC_METACLASS_$_MyBaseClass: + .quad _OBJC_METACLASS_$_MyBaseClass + .quad _OBJC_CLASS_$_MyBaseClass + .quad 0 + .quad 0 + .quad __OBJC_METACLASS_RO_$_MyBaseClass + .section __TEXT,__objc_classname,cstring_literals +l_OBJC_CLASS_NAME_: + .asciz "MyBaseClass" + .section __DATA,__objc_const + .p2align 3, 0x0 +__OBJC_METACLASS_RO_$_MyBaseClass: + .long 3 + .long 40 + .long 40 + .space 4 + .quad 0 + .quad l_OBJC_CLASS_NAME_ + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_: + .asciz "baseInstanceMethod" + .section __TEXT,__objc_methtype,cstring_literals +l_OBJC_METH_VAR_TYPE_: + .asciz "v16@0:8" + .section __DATA,__objc_const + .p2align 3, 0x0 +__OBJC_$_INSTANCE_METHODS_MyBaseClass: + .long 24 + .long 1 + .quad l_OBJC_METH_VAR_NAME_ + .quad l_OBJC_METH_VAR_TYPE_ + .quad "-[MyBaseClass baseInstanceMethod]" + .p2align 3, 0x0 +__OBJC_CLASS_RO_$_MyBaseClass: + .long 2 + .long 0 + .long 0 + .space 4 + .quad 0 + .quad l_OBJC_CLASS_NAME_ + .quad __OBJC_$_INSTANCE_METHODS_MyBaseClass + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .section __DATA,__objc_classlist,regular,no_dead_strip + .p2align 3, 0x0 +l_OBJC_LABEL_CLASS_$: + .quad _OBJC_CLASS_$_MyBaseClass + .section __DATA,__objc_imageinfo,regular,no_dead_strip +L_OBJC_IMAGE_INFO: + .long 0 + .long 64 +.subsections_via_symbols diff --git a/lld/wasm/WriterUtils.cpp b/lld/wasm/WriterUtils.cpp index cdd2c42f939ef..c6a1592012e64 100644 --- a/lld/wasm/WriterUtils.cpp +++ b/lld/wasm/WriterUtils.cpp @@ -35,6 +35,8 @@ std::string toString(ValType type) { return "funcref"; case ValType::EXTERNREF: return "externref"; + case ValType::EXNREF: + return "exnref"; case ValType::OTHERREF: return "otherref"; } diff --git a/lldb/bindings/headers.swig b/lldb/bindings/headers.swig index ffdc3c31ec883..c91504604b6ac 100644 --- a/lldb/bindings/headers.swig +++ b/lldb/bindings/headers.swig @@ -8,6 +8,8 @@ %{ #include "lldb/lldb-public.h" #include "lldb/API/SBAddress.h" +#include "lldb/API/SBAddressRange.h" +#include "lldb/API/SBAddressRangeList.h" #include "lldb/API/SBAttachInfo.h" #include "lldb/API/SBBlock.h" #include "lldb/API/SBBreakpoint.h" diff --git a/lldb/bindings/interface/SBAddressRangeDocstrings.i b/lldb/bindings/interface/SBAddressRangeDocstrings.i new file mode 100644 index 0000000000000..650195704d73e --- /dev/null +++ b/lldb/bindings/interface/SBAddressRangeDocstrings.i @@ -0,0 +1,3 @@ +%feature("docstring", +"API clients can get address range information." +) lldb::SBAddressRange; diff --git a/lldb/bindings/interface/SBAddressRangeExtensions.i b/lldb/bindings/interface/SBAddressRangeExtensions.i new file mode 100644 index 0000000000000..31bcfcb64590b --- /dev/null +++ b/lldb/bindings/interface/SBAddressRangeExtensions.i @@ -0,0 +1,11 @@ +%extend lldb::SBAddressRange { +#ifdef SWIGPYTHON + %pythoncode%{ + def __repr__(self): + import lldb + stream = lldb.SBStream() + self.GetDescription(stream, lldb.target if lldb.target else lldb.SBTarget()) + return stream.GetData() + %} +#endif +} diff --git a/lldb/bindings/interface/SBAddressRangeListDocstrings.i b/lldb/bindings/interface/SBAddressRangeListDocstrings.i new file mode 100644 index 0000000000000..e4b96b9ca5931 --- /dev/null +++ b/lldb/bindings/interface/SBAddressRangeListDocstrings.i @@ -0,0 +1,3 @@ +%feature("docstring", +"Represents a list of :py:class:`SBAddressRange`." +) lldb::SBAddressRangeList; diff --git a/lldb/bindings/interface/SBAddressRangeListExtensions.i b/lldb/bindings/interface/SBAddressRangeListExtensions.i new file mode 100644 index 0000000000000..e281a84d73d27 --- /dev/null +++ b/lldb/bindings/interface/SBAddressRangeListExtensions.i @@ -0,0 +1,29 @@ +%extend lldb::SBAddressRangeList { +#ifdef SWIGPYTHON + %pythoncode%{ + def __len__(self): + '''Return the number of address ranges in a lldb.SBAddressRangeList object.''' + return self.GetSize() + + def __iter__(self): + '''Iterate over all the address ranges in a lldb.SBAddressRangeList object.''' + return lldb_iter(self, 'GetSize', 'GetAddressRangeAtIndex') + + def __getitem__(self, idx): + '''Get the address range at a given index in an lldb.SBAddressRangeList object.''' + if not isinstance(idx, int): + raise TypeError("unsupported index type: %s" % type(idx)) + count = len(self) + if not (-count <= idx < count): + raise IndexError("list index out of range") + idx %= count + return self.GetAddressRangeAtIndex(idx) + + def __repr__(self): + import lldb + stream = lldb.SBStream() + self.GetDescription(stream, lldb.target if lldb.target else lldb.SBTarget()) + return stream.GetData() + %} +#endif +} diff --git a/lldb/bindings/interfaces.swig b/lldb/bindings/interfaces.swig index 2a29a8dd7ef0b..0953f4c72a910 100644 --- a/lldb/bindings/interfaces.swig +++ b/lldb/bindings/interfaces.swig @@ -12,6 +12,8 @@ /* Docstrings for SB classes and methods */ %include "./interface/SBAddressDocstrings.i" +%include "./interface/SBAddressRangeDocstrings.i" +%include "./interface/SBAddressRangeListDocstrings.i" %include "./interface/SBAttachInfoDocstrings.i" %include "./interface/SBBlockDocstrings.i" %include "./interface/SBBreakpointDocstrings.i" @@ -86,6 +88,8 @@ /* API headers */ %include "lldb/API/SBAddress.h" +%include "lldb/API/SBAddressRange.h" +%include "lldb/API/SBAddressRangeList.h" %include "lldb/API/SBAttachInfo.h" %include "lldb/API/SBBlock.h" %include "lldb/API/SBBreakpoint.h" @@ -163,6 +167,8 @@ /* Extensions for SB classes */ %include "./interface/SBAddressExtensions.i" +%include "./interface/SBAddressRangeExtensions.i" +%include "./interface/SBAddressRangeListExtensions.i" %include "./interface/SBBlockExtensions.i" %include "./interface/SBBreakpointExtensions.i" %include "./interface/SBBreakpointListExtensions.i" diff --git a/lldb/include/lldb/API/LLDB.h b/lldb/include/lldb/API/LLDB.h index b256544326a22..d8cc9f5067fe9 100644 --- a/lldb/include/lldb/API/LLDB.h +++ b/lldb/include/lldb/API/LLDB.h @@ -10,6 +10,8 @@ #define LLDB_API_LLDB_H #include "lldb/API/SBAddress.h" +#include "lldb/API/SBAddressRange.h" +#include "lldb/API/SBAddressRangeList.h" #include "lldb/API/SBAttachInfo.h" #include "lldb/API/SBBlock.h" #include "lldb/API/SBBreakpoint.h" diff --git a/lldb/include/lldb/API/SBAddress.h b/lldb/include/lldb/API/SBAddress.h index 5e5f355ccc390..430dad4862dbf 100644 --- a/lldb/include/lldb/API/SBAddress.h +++ b/lldb/include/lldb/API/SBAddress.h @@ -86,6 +86,7 @@ class LLDB_API SBAddress { lldb::SBLineEntry GetLineEntry(); protected: + friend class SBAddressRange; friend class SBBlock; friend class SBBreakpoint; friend class SBBreakpointLocation; diff --git a/lldb/include/lldb/API/SBAddressRange.h b/lldb/include/lldb/API/SBAddressRange.h new file mode 100644 index 0000000000000..152bd82426af1 --- /dev/null +++ b/lldb/include/lldb/API/SBAddressRange.h @@ -0,0 +1,66 @@ +//===-- SBAddressRange.h ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_API_SBADDRESSRANGE_H +#define LLDB_API_SBADDRESSRANGE_H + +#include "lldb/API/SBDefines.h" + +namespace lldb { + +class LLDB_API SBAddressRange { +public: + SBAddressRange(); + + SBAddressRange(const lldb::SBAddressRange &rhs); + + SBAddressRange(lldb::SBAddress addr, lldb::addr_t byte_size); + + ~SBAddressRange(); + + const lldb::SBAddressRange &operator=(const lldb::SBAddressRange &rhs); + + void Clear(); + + /// Check the address range refers to a valid base address and has a byte + /// size greater than zero. + /// + /// \return + /// True if the address range is valid, false otherwise. + bool IsValid() const; + + /// Get the base address of the range. + /// + /// \return + /// Base address object. + lldb::SBAddress GetBaseAddress() const; + + /// Get the byte size of this range. + /// + /// \return + /// The size in bytes of this address range. + lldb::addr_t GetByteSize() const; + + bool operator==(const SBAddressRange &rhs); + + bool operator!=(const SBAddressRange &rhs); + + bool GetDescription(lldb::SBStream &description, const SBTarget target); + +private: + friend class SBAddressRangeList; + friend class SBBlock; + friend class SBFunction; + friend class SBProcess; + + AddressRangeUP m_opaque_up; +}; + +} // namespace lldb + +#endif // LLDB_API_SBADDRESSRANGE_H diff --git a/lldb/include/lldb/API/SBAddressRangeList.h b/lldb/include/lldb/API/SBAddressRangeList.h new file mode 100644 index 0000000000000..a123287ef1b4f --- /dev/null +++ b/lldb/include/lldb/API/SBAddressRangeList.h @@ -0,0 +1,54 @@ +//===-- SBAddressRangeList.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_API_SBADDRESSRANGELIST_H +#define LLDB_API_SBADDRESSRANGELIST_H + +#include + +#include "lldb/API/SBDefines.h" + +namespace lldb_private { +class AddressRangeListImpl; +} + +namespace lldb { + +class LLDB_API SBAddressRangeList { +public: + SBAddressRangeList(); + + SBAddressRangeList(const lldb::SBAddressRangeList &rhs); + + ~SBAddressRangeList(); + + const lldb::SBAddressRangeList & + operator=(const lldb::SBAddressRangeList &rhs); + + uint32_t GetSize() const; + + void Clear(); + + SBAddressRange GetAddressRangeAtIndex(uint64_t idx); + + void Append(const lldb::SBAddressRange &addr_range); + + void Append(const lldb::SBAddressRangeList &addr_range_list); + + bool GetDescription(lldb::SBStream &description, const SBTarget &target); + +private: + friend class SBBlock; + friend class SBProcess; + + std::unique_ptr m_opaque_up; +}; + +} // namespace lldb + +#endif // LLDB_API_SBADDRESSRANGELIST_H diff --git a/lldb/include/lldb/API/SBBlock.h b/lldb/include/lldb/API/SBBlock.h index 2570099f7652f..de4bb22be2692 100644 --- a/lldb/include/lldb/API/SBBlock.h +++ b/lldb/include/lldb/API/SBBlock.h @@ -9,6 +9,8 @@ #ifndef LLDB_API_SBBLOCK_H #define LLDB_API_SBBLOCK_H +#include "lldb/API/SBAddressRange.h" +#include "lldb/API/SBAddressRangeList.h" #include "lldb/API/SBDefines.h" #include "lldb/API/SBFrame.h" #include "lldb/API/SBTarget.h" @@ -52,6 +54,8 @@ class LLDB_API SBBlock { lldb::SBAddress GetRangeEndAddress(uint32_t idx); + lldb::SBAddressRangeList GetRanges(); + uint32_t GetRangeIndexForBlockAddress(lldb::SBAddress block_addr); lldb::SBValueList GetVariables(lldb::SBFrame &frame, bool arguments, diff --git a/lldb/include/lldb/API/SBDefines.h b/lldb/include/lldb/API/SBDefines.h index 1181920677b46..87c0a1c3661ca 100644 --- a/lldb/include/lldb/API/SBDefines.h +++ b/lldb/include/lldb/API/SBDefines.h @@ -43,6 +43,8 @@ namespace lldb { class LLDB_API SBAddress; +class LLDB_API SBAddressRange; +class LLDB_API SBAddressRangeList; class LLDB_API SBAttachInfo; class LLDB_API SBBlock; class LLDB_API SBBreakpoint; diff --git a/lldb/include/lldb/API/SBFunction.h b/lldb/include/lldb/API/SBFunction.h index 71b372a818e4b..df607fdc7ebf5 100644 --- a/lldb/include/lldb/API/SBFunction.h +++ b/lldb/include/lldb/API/SBFunction.h @@ -10,6 +10,7 @@ #define LLDB_API_SBFUNCTION_H #include "lldb/API/SBAddress.h" +#include "lldb/API/SBAddressRangeList.h" #include "lldb/API/SBDefines.h" #include "lldb/API/SBInstructionList.h" @@ -44,6 +45,8 @@ class LLDB_API SBFunction { lldb::SBAddress GetEndAddress(); + lldb::SBAddressRangeList GetRanges(); + const char *GetArgumentName(uint32_t arg_idx); uint32_t GetPrologueByteSize(); diff --git a/lldb/include/lldb/API/SBStream.h b/lldb/include/lldb/API/SBStream.h index 0e33f05b69916..71caf41fd7549 100644 --- a/lldb/include/lldb/API/SBStream.h +++ b/lldb/include/lldb/API/SBStream.h @@ -62,6 +62,8 @@ class LLDB_API SBStream { protected: friend class SBAddress; + friend class SBAddressRange; + friend class SBAddressRangeList; friend class SBBlock; friend class SBBreakpoint; friend class SBBreakpointLocation; diff --git a/lldb/include/lldb/API/SBTarget.h b/lldb/include/lldb/API/SBTarget.h index feeaa1cb71132..35c2ed9c20a23 100644 --- a/lldb/include/lldb/API/SBTarget.h +++ b/lldb/include/lldb/API/SBTarget.h @@ -943,6 +943,7 @@ class LLDB_API SBTarget { protected: friend class SBAddress; + friend class SBAddressRange; friend class SBBlock; friend class SBBreakpoint; friend class SBBreakpointList; diff --git a/lldb/include/lldb/Core/AddressRange.h b/lldb/include/lldb/Core/AddressRange.h index 4a33c2d795876..68a3ad0edd2d7 100644 --- a/lldb/include/lldb/Core/AddressRange.h +++ b/lldb/include/lldb/Core/AddressRange.h @@ -86,6 +86,8 @@ class AddressRange { /// (LLDB_INVALID_ADDRESS) and a zero byte size. void Clear(); + bool IsValid() const; + /// Check if a section offset address is contained in this range. /// /// \param[in] so_addr @@ -236,12 +238,24 @@ class AddressRange { /// The new size in bytes of this address range. void SetByteSize(lldb::addr_t byte_size) { m_byte_size = byte_size; } + bool GetDescription(Stream *s, Target *target) const; + + bool operator==(const AddressRange &rhs); + + bool operator!=(const AddressRange &rhs); + protected: // Member variables Address m_base_addr; ///< The section offset base address of this range. lldb::addr_t m_byte_size = 0; ///< The size in bytes of this address range. }; +// Forward-declarable wrapper. +class AddressRanges : public std::vector { +public: + using std::vector::vector; +}; + } // namespace lldb_private #endif // LLDB_CORE_ADDRESSRANGE_H diff --git a/lldb/include/lldb/Core/AddressRangeListImpl.h b/lldb/include/lldb/Core/AddressRangeListImpl.h new file mode 100644 index 0000000000000..46ebfe73d4d92 --- /dev/null +++ b/lldb/include/lldb/Core/AddressRangeListImpl.h @@ -0,0 +1,51 @@ +//===-- AddressRangeListImpl.h ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_CORE_ADDRESSRANGELISTIMPL_H +#define LLDB_CORE_ADDRESSRANGELISTIMPL_H + +#include "lldb/Core/AddressRange.h" +#include + +namespace lldb { +class SBBlock; +} + +namespace lldb_private { + +class AddressRangeListImpl { +public: + AddressRangeListImpl(); + + AddressRangeListImpl(const AddressRangeListImpl &rhs) = default; + + AddressRangeListImpl &operator=(const AddressRangeListImpl &rhs); + + size_t GetSize() const; + + void Reserve(size_t capacity); + + void Append(const AddressRange &sb_region); + + void Append(const AddressRangeListImpl &list); + + void Clear(); + + lldb_private::AddressRange GetAddressRangeAtIndex(size_t index); + +private: + friend class lldb::SBBlock; + + AddressRanges &ref(); + + AddressRanges m_ranges; +}; + +} // namespace lldb_private + +#endif // LLDB_CORE_ADDRESSRANGE_H diff --git a/lldb/include/lldb/Symbol/Block.h b/lldb/include/lldb/Symbol/Block.h index 02fd2add53103..c9c4d5ad767d7 100644 --- a/lldb/include/lldb/Symbol/Block.h +++ b/lldb/include/lldb/Symbol/Block.h @@ -355,6 +355,8 @@ class Block : public UserID, public SymbolContextScope { // be able to get at any of the address ranges in a block. bool GetRangeAtIndex(uint32_t range_idx, AddressRange &range); + AddressRanges GetRanges(); + bool GetStartAddress(Address &addr); void SetDidParseVariables(bool b, bool set_children); diff --git a/lldb/include/lldb/Symbol/Type.h b/lldb/include/lldb/Symbol/Type.h index 7aa0852676e46..c6f30cde81867 100644 --- a/lldb/include/lldb/Symbol/Type.h +++ b/lldb/include/lldb/Symbol/Type.h @@ -62,6 +62,8 @@ struct CompilerContext { CompilerContextKind kind; ConstString name; }; +llvm::raw_ostream &operator<<(llvm::raw_ostream &os, + const CompilerContext &rhs); /// Match \p context_chain against \p pattern, which may contain "Any" /// kinds. The \p context_chain should *not* contain any "Any" kinds. diff --git a/lldb/include/lldb/Target/RegisterFlags.h b/lldb/include/lldb/Target/RegisterFlags.h index 9b343e445678a..29a47540cd4f5 100644 --- a/lldb/include/lldb/Target/RegisterFlags.h +++ b/lldb/include/lldb/Target/RegisterFlags.h @@ -15,7 +15,7 @@ namespace lldb_private { -class StreamString; +class Stream; class Log; class RegisterFlags { @@ -56,7 +56,7 @@ class RegisterFlags { /// Output XML that describes this field, to be inserted into a target XML /// file. Reserved characters in field names like "<" are replaced with /// their XML safe equivalents like ">". - void ToXML(StreamString &strm) const; + void ToXML(Stream &strm) const; bool operator<(const Field &rhs) const { return GetStart() < rhs.GetStart(); @@ -119,7 +119,7 @@ class RegisterFlags { std::string AsTable(uint32_t max_width) const; // Output XML that describes this set of flags. - void ToXML(StreamString &strm) const; + void ToXML(Stream &strm) const; private: const std::string m_id; diff --git a/lldb/include/lldb/lldb-forward.h b/lldb/include/lldb/lldb-forward.h index 10ba921b9dac8..6d880b4da03c9 100644 --- a/lldb/include/lldb/lldb-forward.h +++ b/lldb/include/lldb/lldb-forward.h @@ -19,6 +19,8 @@ class ASTResultSynthesizer; class ASTStructExtractor; class Address; class AddressRange; +class AddressRanges; +class AddressRangeList; class AddressResolver; class ArchSpec; class Architecture; @@ -308,6 +310,7 @@ template class StreamBuffer; namespace lldb { typedef std::shared_ptr ABISP; +typedef std::unique_ptr AddressRangeUP; typedef std::shared_ptr BatonSP; typedef std::shared_ptr BlockSP; typedef std::shared_ptr BreakpointSP; diff --git a/lldb/source/API/CMakeLists.txt b/lldb/source/API/CMakeLists.txt index e8228afe103f9..6397101609315 100644 --- a/lldb/source/API/CMakeLists.txt +++ b/lldb/source/API/CMakeLists.txt @@ -42,6 +42,8 @@ set_target_properties(lldb-sbapi-dwarf-enums PROPERTIES FOLDER "LLDB/Tablegennin add_lldb_library(liblldb SHARED ${option_framework} SBAddress.cpp + SBAddressRange.cpp + SBAddressRangeList.cpp SBAttachInfo.cpp SBBlock.cpp SBBreakpoint.cpp diff --git a/lldb/source/API/SBAddressRange.cpp b/lldb/source/API/SBAddressRange.cpp new file mode 100644 index 0000000000000..9b1affdade439 --- /dev/null +++ b/lldb/source/API/SBAddressRange.cpp @@ -0,0 +1,103 @@ +//===-- SBAddressRange.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/API/SBAddressRange.h" +#include "Utils.h" +#include "lldb/API/SBAddress.h" +#include "lldb/API/SBStream.h" +#include "lldb/API/SBTarget.h" +#include "lldb/Core/AddressRange.h" +#include "lldb/Core/Section.h" +#include "lldb/Utility/Instrumentation.h" +#include "lldb/Utility/Stream.h" +#include +#include + +using namespace lldb; +using namespace lldb_private; + +SBAddressRange::SBAddressRange() + : m_opaque_up(std::make_unique()) { + LLDB_INSTRUMENT_VA(this); +} + +SBAddressRange::SBAddressRange(const SBAddressRange &rhs) { + LLDB_INSTRUMENT_VA(this, rhs); + + m_opaque_up = clone(rhs.m_opaque_up); +} + +SBAddressRange::SBAddressRange(lldb::SBAddress addr, lldb::addr_t byte_size) + : m_opaque_up(std::make_unique(addr.ref(), byte_size)) { + LLDB_INSTRUMENT_VA(this, addr, byte_size); +} + +SBAddressRange::~SBAddressRange() = default; + +const SBAddressRange &SBAddressRange::operator=(const SBAddressRange &rhs) { + LLDB_INSTRUMENT_VA(this, rhs); + + if (this != &rhs) + m_opaque_up = clone(rhs.m_opaque_up); + return *this; +} + +bool SBAddressRange::operator==(const SBAddressRange &rhs) { + LLDB_INSTRUMENT_VA(this, rhs); + + if (!IsValid() || !rhs.IsValid()) + return false; + return m_opaque_up->operator==(*(rhs.m_opaque_up)); +} + +bool SBAddressRange::operator!=(const SBAddressRange &rhs) { + LLDB_INSTRUMENT_VA(this, rhs); + + return !(*this == rhs); +} + +void SBAddressRange::Clear() { + LLDB_INSTRUMENT_VA(this); + + m_opaque_up.reset(); +} + +bool SBAddressRange::IsValid() const { + LLDB_INSTRUMENT_VA(this); + + return m_opaque_up && m_opaque_up->IsValid(); +} + +lldb::SBAddress SBAddressRange::GetBaseAddress() const { + LLDB_INSTRUMENT_VA(this); + + if (!IsValid()) + return lldb::SBAddress(); + return lldb::SBAddress(m_opaque_up->GetBaseAddress()); +} + +lldb::addr_t SBAddressRange::GetByteSize() const { + LLDB_INSTRUMENT_VA(this); + + if (!IsValid()) + return 0; + return m_opaque_up->GetByteSize(); +} + +bool SBAddressRange::GetDescription(SBStream &description, + const SBTarget target) { + LLDB_INSTRUMENT_VA(this, description, target); + + Stream &stream = description.ref(); + if (!IsValid()) { + stream << ""; + return true; + } + m_opaque_up->GetDescription(&stream, target.GetSP().get()); + return true; +} diff --git a/lldb/source/API/SBAddressRangeList.cpp b/lldb/source/API/SBAddressRangeList.cpp new file mode 100644 index 0000000000000..20660b3ff2088 --- /dev/null +++ b/lldb/source/API/SBAddressRangeList.cpp @@ -0,0 +1,94 @@ +//===-- SBAddressRangeList.cpp --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/API/SBAddressRangeList.h" +#include "Utils.h" +#include "lldb/API/SBAddressRange.h" +#include "lldb/API/SBStream.h" +#include "lldb/API/SBTarget.h" +#include "lldb/Core/AddressRangeListImpl.h" +#include "lldb/Utility/Instrumentation.h" +#include "lldb/Utility/Stream.h" + +#include + +using namespace lldb; +using namespace lldb_private; + +SBAddressRangeList::SBAddressRangeList() + : m_opaque_up(std::make_unique()) { + LLDB_INSTRUMENT_VA(this); +} + +SBAddressRangeList::SBAddressRangeList(const SBAddressRangeList &rhs) + : m_opaque_up(std::make_unique(*rhs.m_opaque_up)) { + LLDB_INSTRUMENT_VA(this, rhs); +} + +SBAddressRangeList::~SBAddressRangeList() = default; + +const SBAddressRangeList & +SBAddressRangeList::operator=(const SBAddressRangeList &rhs) { + LLDB_INSTRUMENT_VA(this, rhs); + + if (this != &rhs) + *m_opaque_up = *rhs.m_opaque_up; + return *this; +} + +uint32_t SBAddressRangeList::GetSize() const { + LLDB_INSTRUMENT_VA(this); + + return m_opaque_up->GetSize(); +} + +SBAddressRange SBAddressRangeList::GetAddressRangeAtIndex(uint64_t idx) { + LLDB_INSTRUMENT_VA(this, idx); + + SBAddressRange sb_addr_range; + (*sb_addr_range.m_opaque_up) = m_opaque_up->GetAddressRangeAtIndex(idx); + return sb_addr_range; +} + +void SBAddressRangeList::Clear() { + LLDB_INSTRUMENT_VA(this); + + m_opaque_up->Clear(); +} + +void SBAddressRangeList::Append(const SBAddressRange &sb_addr_range) { + LLDB_INSTRUMENT_VA(this, sb_addr_range); + + m_opaque_up->Append(*sb_addr_range.m_opaque_up); +} + +void SBAddressRangeList::Append(const SBAddressRangeList &sb_addr_range_list) { + LLDB_INSTRUMENT_VA(this, sb_addr_range_list); + + m_opaque_up->Append(*sb_addr_range_list.m_opaque_up); +} + +bool SBAddressRangeList::GetDescription(SBStream &description, + const SBTarget &target) { + LLDB_INSTRUMENT_VA(this, description, target); + + const uint32_t num_ranges = GetSize(); + bool is_first = true; + Stream &stream = description.ref(); + stream << "["; + for (uint32_t i = 0; i < num_ranges; ++i) { + if (is_first) { + is_first = false; + } else { + stream.Printf(", "); + } + GetAddressRangeAtIndex(i).GetDescription(description, target); + } + stream << "]"; + return true; +} diff --git a/lldb/source/API/SBBlock.cpp b/lldb/source/API/SBBlock.cpp index 7d7565340836b..2577b14920f06 100644 --- a/lldb/source/API/SBBlock.cpp +++ b/lldb/source/API/SBBlock.cpp @@ -13,6 +13,7 @@ #include "lldb/API/SBStream.h" #include "lldb/API/SBValue.h" #include "lldb/Core/AddressRange.h" +#include "lldb/Core/AddressRangeListImpl.h" #include "lldb/Core/ValueObjectVariable.h" #include "lldb/Symbol/Block.h" #include "lldb/Symbol/Function.h" @@ -219,6 +220,15 @@ lldb::SBAddress SBBlock::GetRangeEndAddress(uint32_t idx) { return sb_addr; } +lldb::SBAddressRangeList SBBlock::GetRanges() { + LLDB_INSTRUMENT_VA(this); + + lldb::SBAddressRangeList sb_ranges; + if (m_opaque_ptr) + sb_ranges.m_opaque_up->ref() = m_opaque_ptr->GetRanges(); + return sb_ranges; +} + uint32_t SBBlock::GetRangeIndexForBlockAddress(lldb::SBAddress block_addr) { LLDB_INSTRUMENT_VA(this, block_addr); diff --git a/lldb/source/API/SBFunction.cpp b/lldb/source/API/SBFunction.cpp index a01c7f79bbd31..6a97352fc2c2f 100644 --- a/lldb/source/API/SBFunction.cpp +++ b/lldb/source/API/SBFunction.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBFunction.h" +#include "lldb/API/SBAddressRange.h" #include "lldb/API/SBProcess.h" #include "lldb/API/SBStream.h" #include "lldb/Core/Disassembler.h" @@ -160,6 +161,19 @@ SBAddress SBFunction::GetEndAddress() { return addr; } +lldb::SBAddressRangeList SBFunction::GetRanges() { + LLDB_INSTRUMENT_VA(this); + + lldb::SBAddressRangeList ranges; + if (m_opaque_ptr) { + lldb::SBAddressRange range; + (*range.m_opaque_up) = m_opaque_ptr->GetAddressRange(); + ranges.Append(std::move(range)); + } + + return ranges; +} + const char *SBFunction::GetArgumentName(uint32_t arg_idx) { LLDB_INSTRUMENT_VA(this, arg_idx); diff --git a/lldb/source/Core/AddressRange.cpp b/lldb/source/Core/AddressRange.cpp index 1830f2ccd47fe..6cef7e149cd20 100644 --- a/lldb/source/Core/AddressRange.cpp +++ b/lldb/source/Core/AddressRange.cpp @@ -14,6 +14,7 @@ #include "lldb/Utility/FileSpec.h" #include "lldb/Utility/Stream.h" #include "lldb/lldb-defines.h" +#include "lldb/lldb-types.h" #include "llvm/Support/Compiler.h" @@ -145,6 +146,10 @@ void AddressRange::Clear() { m_byte_size = 0; } +bool AddressRange::IsValid() const { + return m_base_addr.IsValid() && (m_byte_size > 0); +} + bool AddressRange::Dump(Stream *s, Target *target, Address::DumpStyle style, Address::DumpStyle fallback_style) const { addr_t vmaddr = LLDB_INVALID_ADDRESS; @@ -203,3 +208,41 @@ void AddressRange::DumpDebug(Stream *s) const { static_cast(m_base_addr.GetSection().get()), m_base_addr.GetOffset(), GetByteSize()); } + +bool AddressRange::GetDescription(Stream *s, Target *target) const { + addr_t start_addr = m_base_addr.GetLoadAddress(target); + if (start_addr != LLDB_INVALID_ADDRESS) { + // We have a valid target and the address was resolved, or we have a base + // address with no section. Just print out a raw address range: [, + // ) + s->Printf("[0x%" PRIx64 "-0x%" PRIx64 ")", start_addr, + start_addr + GetByteSize()); + return true; + } + + // Either no target or the address wasn't resolved, print as + // [-) + const char *file_name = ""; + const auto section_sp = m_base_addr.GetSection(); + if (section_sp) { + if (const auto object_file = section_sp->GetObjectFile()) + file_name = object_file->GetFileSpec().GetFilename().AsCString(); + } + start_addr = m_base_addr.GetFileAddress(); + const addr_t end_addr = (start_addr == LLDB_INVALID_ADDRESS) + ? LLDB_INVALID_ADDRESS + : start_addr + GetByteSize(); + s->Printf("%s[0x%" PRIx64 "-0x%" PRIx64 ")", file_name, start_addr, end_addr); + return true; +} + +bool AddressRange::operator==(const AddressRange &rhs) { + if (!IsValid() || !rhs.IsValid()) + return false; + return m_base_addr == rhs.GetBaseAddress() && + m_byte_size == rhs.GetByteSize(); +} + +bool AddressRange::operator!=(const AddressRange &rhs) { + return !(*this == rhs); +} diff --git a/lldb/source/Core/AddressRangeListImpl.cpp b/lldb/source/Core/AddressRangeListImpl.cpp new file mode 100644 index 0000000000000..d405cf0fa3ec3 --- /dev/null +++ b/lldb/source/Core/AddressRangeListImpl.cpp @@ -0,0 +1,50 @@ +//===-- AddressRangeListImpl.cpp ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Core/AddressRangeListImpl.h" + +using namespace lldb; +using namespace lldb_private; + +AddressRangeListImpl::AddressRangeListImpl() : m_ranges() {} + +AddressRangeListImpl & +AddressRangeListImpl::operator=(const AddressRangeListImpl &rhs) { + if (this == &rhs) + return *this; + m_ranges = rhs.m_ranges; + return *this; +} + +size_t AddressRangeListImpl::GetSize() const { return m_ranges.size(); } + +void AddressRangeListImpl::Reserve(size_t capacity) { + m_ranges.reserve(capacity); +} + +void AddressRangeListImpl::Append(const AddressRange &sb_region) { + m_ranges.emplace_back(sb_region); +} + +void AddressRangeListImpl::Append(const AddressRangeListImpl &list) { + Reserve(GetSize() + list.GetSize()); + + for (const auto &range : list.m_ranges) + Append(range); +} + +void AddressRangeListImpl::Clear() { m_ranges.clear(); } + +lldb_private::AddressRange +AddressRangeListImpl::GetAddressRangeAtIndex(size_t index) { + if (index >= GetSize()) + return AddressRange(); + return m_ranges[index]; +} + +AddressRanges &AddressRangeListImpl::ref() { return m_ranges; } diff --git a/lldb/source/Core/CMakeLists.txt b/lldb/source/Core/CMakeLists.txt index f24dbbd45a8e8..dbc620b91b1ed 100644 --- a/lldb/source/Core/CMakeLists.txt +++ b/lldb/source/Core/CMakeLists.txt @@ -20,6 +20,7 @@ endif() add_lldb_library(lldbCore Address.cpp AddressRange.cpp + AddressRangeListImpl.cpp AddressResolver.cpp AddressResolverFileLine.cpp Communication.cpp diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.cpp index ca582cb1d5a46..ddaa7a8a597b4 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.cpp @@ -13,6 +13,8 @@ #include "lldb/Symbol/CompilerType.h" #include "lldb/Target/Process.h" #include "lldb/Target/Target.h" +#include "lldb/Utility/LLDBLog.h" +#include "lldb/Utility/Log.h" #include "lldb/Utility/StringLexer.h" #include "clang/Basic/TargetInfo.h" @@ -234,12 +236,15 @@ clang::QualType AppleObjCTypeEncodingParser::BuildObjCObjectPointerType( auto types = decl_vendor->FindTypes(ConstString(name), /*max_matches*/ 1); - // The user can forward-declare something that has no definition. The runtime - // doesn't prohibit this at all. This is a rare and very weird case. We keep - // this assert in debug builds so we catch other weird cases. - lldbassert(!types.empty()); - if (types.empty()) + if (types.empty()) { + // The user can forward-declare something that has no definition. The + // runtime doesn't prohibit this at all. This is a rare and very weird + // case. Assert assert in debug builds so we catch other weird cases. + assert(false && "forward declaration without definition"); + LLDB_LOG(GetLog(LLDBLog::Types), + "forward declaration without definition: {0}", name); return ast_ctx.getObjCIdType(); + } return ClangUtil::GetQualType(types.front().GetPointerType()); } else { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp index 33537df4f5076..1703597a7cd2f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp @@ -284,8 +284,12 @@ void AppleDWARFIndex::GetFunctions( for (const auto &entry : m_apple_names_up->equal_range(name)) { DIERef die_ref(std::nullopt, DIERef::Section::DebugInfo, *entry.getDIESectionOffset()); - if (!ProcessFunctionDIE(lookup_info, die_ref, dwarf, parent_decl_ctx, - callback)) + DWARFDIE die = dwarf.GetDIE(die_ref); + if (!die) { + ReportInvalidDIERef(die_ref, name); + continue; + } + if (!ProcessFunctionDIE(lookup_info, die, parent_decl_ctx, callback)) return; } } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h index 66db396279e06..e144cf0f9bd94 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h @@ -60,6 +60,8 @@ class DWARFASTParser { virtual ConstString GetDIEClassTemplateParams(const DWARFDIE &die) = 0; + virtual lldb_private::Type *FindDefinitionTypeForDIE(const DWARFDIE &die) = 0; + static std::optional ParseChildArrayInfo(const DWARFDIE &parent_die, const ExecutionContext *exe_ctx = nullptr); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index f8101aba5c627..e0b1b430b266f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -154,6 +154,26 @@ static bool TagIsRecordType(dw_tag_t tag) { } } +static bool IsForwardDeclaration(const DWARFDIE &die, + const ParsedDWARFTypeAttributes &attrs, + LanguageType cu_language) { + if (attrs.is_forward_declaration) + return true; + + // Work around an issue with clang at the moment where forward + // declarations for objective C classes are emitted as: + // DW_TAG_structure_type [2] + // DW_AT_name( "ForwardObjcClass" ) + // DW_AT_byte_size( 0x00 ) + // DW_AT_decl_file( "..." ) + // DW_AT_decl_line( 1 ) + // + // Note that there is no DW_AT_declaration and there are no children, + // and the byte size is zero. + return attrs.byte_size && *attrs.byte_size == 0 && attrs.name && + !die.HasChildren() && cu_language == eLanguageTypeObjC; +} + TypeSP DWARFASTParserClang::ParseTypeFromClangModule(const SymbolContext &sc, const DWARFDIE &die, Log *log) { @@ -249,11 +269,9 @@ static void ForcefullyCompleteType(CompilerType type) { /// This function serves a similar purpose as RequireCompleteType above, but it /// avoids completing the type if it is not immediately necessary. It only /// ensures we _can_ complete the type later. -static void PrepareContextToReceiveMembers(TypeSystemClang &ast, - ClangASTImporter &ast_importer, - clang::DeclContext *decl_ctx, - DWARFDIE die, - const char *type_name_cstr) { +void DWARFASTParserClang::PrepareContextToReceiveMembers( + clang::DeclContext *decl_ctx, const DWARFDIE &decl_ctx_die, + const DWARFDIE &die, const char *type_name_cstr) { auto *tag_decl_ctx = clang::dyn_cast(decl_ctx); if (!tag_decl_ctx) return; // Non-tag context are always ready. @@ -268,7 +286,8 @@ static void PrepareContextToReceiveMembers(TypeSystemClang &ast, // gmodules case), we can complete the type by doing a full import. // If this type was not imported from an external AST, there's nothing to do. - CompilerType type = ast.GetTypeForDecl(tag_decl_ctx); + CompilerType type = m_ast.GetTypeForDecl(tag_decl_ctx); + ClangASTImporter &ast_importer = GetClangASTImporter(); if (type && ast_importer.CanImport(type)) { auto qual_type = ClangUtil::GetQualType(type); if (ast_importer.RequireCompleteType(qual_type)) @@ -279,6 +298,13 @@ static void PrepareContextToReceiveMembers(TypeSystemClang &ast, type_name_cstr ? type_name_cstr : "", die.GetOffset()); } + // By searching for the definition DIE of the decl_ctx type, we will either: + // 1. Found the the definition DIE and start its definition with + // TypeSystemClang::StartTagDeclarationDefinition. + // 2. Unable to find it, then need to forcefully complete it. + FindDefinitionTypeForDIE(decl_ctx_die); + if (tag_decl_ctx->isCompleteDefinition() || tag_decl_ctx->isBeingDefined()) + return; // We don't have a type definition and/or the import failed. We must // forcefully complete the type to avoid crashes. ForcefullyCompleteType(type); @@ -620,10 +646,11 @@ DWARFASTParserClang::ParseTypeModifier(const SymbolContext &sc, if (tag == DW_TAG_typedef) { // DeclContext will be populated when the clang type is materialized in // Type::ResolveCompilerType. - PrepareContextToReceiveMembers( - m_ast, GetClangASTImporter(), - GetClangDeclContextContainingDIE(die, nullptr), die, - attrs.name.GetCString()); + DWARFDIE decl_ctx_die; + clang::DeclContext *decl_ctx = + GetClangDeclContextContainingDIE(die, &decl_ctx_die); + PrepareContextToReceiveMembers(decl_ctx, decl_ctx_die, die, + attrs.name.GetCString()); if (attrs.type.IsValid()) { // Try to parse a typedef from the (DWARF embedded in the) Clang @@ -1103,32 +1130,6 @@ DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, // struct and see if this is actually a C++ method Type *class_type = dwarf->ResolveType(decl_ctx_die); if (class_type) { - if (class_type->GetID() != decl_ctx_die.GetID() || - IsClangModuleFwdDecl(decl_ctx_die)) { - - // We uniqued the parent class of this function to another - // class so we now need to associate all dies under - // "decl_ctx_die" to DIEs in the DIE for "class_type"... - DWARFDIE class_type_die = dwarf->GetDIE(class_type->GetID()); - - if (class_type_die) { - std::vector failures; - - CopyUniqueClassMethodTypes(decl_ctx_die, class_type_die, - class_type, failures); - - // FIXME do something with these failures that's - // smarter than just dropping them on the ground. - // Unfortunately classes don't like having stuff added - // to them after their definitions are complete... - - Type *type_ptr = dwarf->GetDIEToType()[die.GetDIE()]; - if (type_ptr && type_ptr != DIE_IS_BEING_PARSED) { - return type_ptr->shared_from_this(); - } - } - } - if (attrs.specification.IsValid()) { // We have a specification which we are going to base our // function prototype off of, so we need this type to be @@ -1263,6 +1264,39 @@ DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, } } } + // By here, we should have already completed the c++ class_type + // because if either specification or abstract_origin is present, we + // call GetClangDeclContextForDIE to resolve the DW_TAG_subprogram + // refered by this one until we reached the DW_TAG_subprogram without + // specification or abstract_origin (the else branch above). Then the + // above GetFullCompilerType() will complete the class_type if it's + // not completed yet. After that, we will have the mapping from DIEs + // in class_type_die to DeclContexts in m_die_to_decl_ctx. + if (class_type->GetID() != decl_ctx_die.GetID() || + IsClangModuleFwdDecl(decl_ctx_die)) { + + // We uniqued the parent class of this function to another + // class so we now need to associate all dies under + // "decl_ctx_die" to DIEs in the DIE for "class_type"... + DWARFDIE class_type_die = dwarf->GetDIE(class_type->GetID()); + + if (class_type_die) { + std::vector failures; + + CopyUniqueClassMethodTypes(decl_ctx_die, class_type_die, + class_type, failures); + + // FIXME do something with these failures that's + // smarter than just dropping them on the ground. + // Unfortunately classes don't like having stuff added + // to them after their definitions are complete... + + Type *type_ptr = dwarf->GetDIEToType()[die.GetDIE()]; + if (type_ptr && type_ptr != DIE_IS_BEING_PARSED) { + return type_ptr->shared_from_this(); + } + } + } } } } @@ -1635,6 +1669,93 @@ DWARFASTParserClang::GetCPlusPlusQualifiedName(const DWARFDIE &die) { return qualified_name; } +lldb_private::Type * +DWARFASTParserClang::FindDefinitionTypeForDIE(const DWARFDIE &die) { + SymbolFileDWARF *dwarf = die.GetDWARF(); + ParsedDWARFTypeAttributes attrs(die); + bool is_forward_declaration = IsForwardDeclaration( + die, attrs, SymbolFileDWARF::GetLanguage(*die.GetCU())); + if (!is_forward_declaration) + return dwarf->GetDIEToType()[die.GetDIE()]; + + const dw_tag_t tag = die.Tag(); + TypeSP type_sp; + Log *log = GetLog(DWARFLog::TypeCompletion | DWARFLog::Lookups); + if (log) { + dwarf->GetObjectFile()->GetModule()->LogMessage( + log, + "SymbolFileDWARF({0:p}) - {1:x16}: {2} type \"{3}\" is a " + "forward declaration DIE, trying to find definition DIE", + static_cast(this), die.GetOffset(), DW_TAG_value_to_name(tag), + attrs.name.GetCString()); + } + // We haven't parse definition die for this type, starting to search for it. + // After we found the definition die, the GetDeclarationDIEToDefinitionDIE() + // map will have the new mapping from this declaration die to definition die. + if (attrs.class_language == eLanguageTypeObjC || + attrs.class_language == eLanguageTypeObjC_plus_plus) { + if (!attrs.is_complete_objc_class && + die.Supports_DW_AT_APPLE_objc_complete_type()) { + // We have a valid eSymbolTypeObjCClass class symbol whose name + // matches the current objective C class that we are trying to find + // and this DIE isn't the complete definition (we checked + // is_complete_objc_class above and know it is false), so the real + // definition is in here somewhere + type_sp = + dwarf->FindCompleteObjCDefinitionTypeForDIE(die, attrs.name, true); + + if (!type_sp) { + SymbolFileDWARFDebugMap *debug_map_symfile = + dwarf->GetDebugMapSymfile(); + if (debug_map_symfile) { + // We weren't able to find a full declaration in this DWARF, + // see if we have a declaration anywhere else... + type_sp = debug_map_symfile->FindCompleteObjCDefinitionTypeForDIE( + die, attrs.name, true); + } + } + + if (type_sp && log) { + dwarf->GetObjectFile()->GetModule()->LogMessage( + log, + "SymbolFileDWARF({0:p}) - {1:x16}: {2} ({3}) type \"{4}\" is an " + "incomplete objc type, complete type is {5:x8}", + static_cast(this), die.GetOffset(), + DW_TAG_value_to_name(tag), tag, attrs.name.GetCString(), + type_sp->GetID()); + } + } + } + + type_sp = dwarf->FindDefinitionTypeForDWARFDeclContext(die); + if (!type_sp) { + SymbolFileDWARFDebugMap *debug_map_symfile = dwarf->GetDebugMapSymfile(); + if (debug_map_symfile) { + // We weren't able to find a full declaration in this DWARF, see + // if we have a declaration anywhere else... + type_sp = debug_map_symfile->FindDefinitionTypeForDWARFDeclContext(die); + } + if (type_sp && log) { + dwarf->GetObjectFile()->GetModule()->LogMessage( + log, + "SymbolFileDWARF({0:p}) - {1:x16}: {2} type \"{3}\" is a " + "forward declaration, complete type is {4:x8}", + static_cast(this), die.GetOffset(), DW_TAG_value_to_name(tag), + attrs.name.GetCString(), type_sp->GetID()); + } + } + + if (!type_sp && log) { + dwarf->GetObjectFile()->GetModule()->LogMessage( + log, + "SymbolFileDWARF({0:p}) - {1:x16}: {2} type \"{3}\" is a " + "forward declaration, unable to find definition DIE for it", + static_cast(this), die.GetOffset(), DW_TAG_value_to_name(tag), + attrs.name.GetCString()); + } + return type_sp.get(); +} + TypeSP DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, const DWARFDIE &die, @@ -1646,14 +1767,10 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, LanguageType cu_language = SymbolFileDWARF::GetLanguage(*die.GetCU()); Log *log = GetLog(DWARFLog::TypeCompletion | DWARFLog::Lookups); - // UniqueDWARFASTType is large, so don't create a local variables on the - // stack, put it on the heap. This function is often called recursively and - // clang isn't good at sharing the stack space for variables in different - // blocks. - auto unique_ast_entry_up = std::make_unique(); - ConstString unique_typename(attrs.name); Declaration unique_decl(attrs.decl); + uint64_t byte_size = attrs.byte_size.value_or(0); + attrs.is_forward_declaration = IsForwardDeclaration(die, attrs, cu_language); if (attrs.name) { if (Language::LanguageIsCPlusPlus(cu_language)) { @@ -1666,14 +1783,42 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, unique_decl.Clear(); } - if (dwarf->GetUniqueDWARFASTTypeMap().Find( - unique_typename, die, unique_decl, attrs.byte_size.value_or(-1), - *unique_ast_entry_up)) { - type_sp = unique_ast_entry_up->m_type_sp; + if (UniqueDWARFASTType *unique_ast_entry_type = + dwarf->GetUniqueDWARFASTTypeMap().Find( + unique_typename, die, unique_decl, byte_size, + attrs.is_forward_declaration)) { + type_sp = unique_ast_entry_type->m_type_sp; if (type_sp) { dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); LinkDeclContextToDIE( - GetCachedClangDeclContextForDIE(unique_ast_entry_up->m_die), die); + GetCachedClangDeclContextForDIE(unique_ast_entry_type->m_die), die); + if (!attrs.is_forward_declaration) { + // If the DIE being parsed in this function is a definition and the + // entry in the map is a declaration, then we need to update the entry + // to point to the definition DIE. + if (unique_ast_entry_type->m_is_forward_declaration) { + unique_ast_entry_type->m_die = die; + unique_ast_entry_type->m_byte_size = byte_size; + unique_ast_entry_type->m_declaration = unique_decl; + unique_ast_entry_type->m_is_forward_declaration = false; + // Need to update Type ID to refer to the definition DIE. because + // it's used in ParseSubroutine to determine if we need to copy cxx + // method types from a declaration DIE to this definition DIE. + type_sp->SetID(die.GetID()); + clang_type = type_sp->GetForwardCompilerType(); + if (attrs.class_language != eLanguageTypeObjC && + attrs.class_language != eLanguageTypeObjC_plus_plus) + TypeSystemClang::StartTagDeclarationDefinition(clang_type); + + CompilerType compiler_type_no_qualifiers = + ClangUtil::RemoveFastQualifiers(clang_type); + auto result = dwarf->GetForwardDeclCompilerTypeToDIE().try_emplace( + compiler_type_no_qualifiers.GetOpaqueQualType(), + *die.GetDIERef()); + if (!result.second) + result.first->second = *die.GetDIERef(); + } + } return type_sp; } } @@ -1695,125 +1840,21 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, default_accessibility = eAccessPrivate; } - if (attrs.byte_size && *attrs.byte_size == 0 && attrs.name && - !die.HasChildren() && cu_language == eLanguageTypeObjC) { - // Work around an issue with clang at the moment where forward - // declarations for objective C classes are emitted as: - // DW_TAG_structure_type [2] - // DW_AT_name( "ForwardObjcClass" ) - // DW_AT_byte_size( 0x00 ) - // DW_AT_decl_file( "..." ) - // DW_AT_decl_line( 1 ) - // - // Note that there is no DW_AT_declaration and there are no children, - // and the byte size is zero. - attrs.is_forward_declaration = true; - } - - if (attrs.class_language == eLanguageTypeObjC || - attrs.class_language == eLanguageTypeObjC_plus_plus) { - if (!attrs.is_complete_objc_class && - die.Supports_DW_AT_APPLE_objc_complete_type()) { - // We have a valid eSymbolTypeObjCClass class symbol whose name - // matches the current objective C class that we are trying to find - // and this DIE isn't the complete definition (we checked - // is_complete_objc_class above and know it is false), so the real - // definition is in here somewhere - type_sp = - dwarf->FindCompleteObjCDefinitionTypeForDIE(die, attrs.name, true); - - if (!type_sp) { - SymbolFileDWARFDebugMap *debug_map_symfile = - dwarf->GetDebugMapSymfile(); - if (debug_map_symfile) { - // We weren't able to find a full declaration in this DWARF, - // see if we have a declaration anywhere else... - type_sp = debug_map_symfile->FindCompleteObjCDefinitionTypeForDIE( - die, attrs.name, true); - } - } - - if (type_sp) { - if (log) { - dwarf->GetObjectFile()->GetModule()->LogMessage( - log, - "SymbolFileDWARF({0:p}) - {1:x16}: {2} ({3}) type \"{4}\" is an " - "incomplete objc type, complete type is {5:x8}", - static_cast(this), die.GetOffset(), - DW_TAG_value_to_name(tag), tag, attrs.name.GetCString(), - type_sp->GetID()); - } - - // We found a real definition for this type elsewhere so lets use - // it and cache the fact that we found a complete type for this - // die - dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); - return type_sp; - } - } - } - if (attrs.is_forward_declaration) { - // We have a forward declaration to a type and we need to try and - // find a full declaration. We look in the current type index just in - // case we have a forward declaration followed by an actual - // declarations in the DWARF. If this fails, we need to look - // elsewhere... - if (log) { - dwarf->GetObjectFile()->GetModule()->LogMessage( - log, - "SymbolFileDWARF({0:p}) - {1:x16}: {2} ({3}) type \"{4}\" is a " - "forward declaration, trying to find complete type", - static_cast(this), die.GetOffset(), DW_TAG_value_to_name(tag), - tag, attrs.name.GetCString()); - } - // See if the type comes from a Clang module and if so, track down // that type. type_sp = ParseTypeFromClangModule(sc, die, log); if (type_sp) return type_sp; - - // type_sp = FindDefinitionTypeForDIE (dwarf_cu, die, - // type_name_const_str); - type_sp = dwarf->FindDefinitionTypeForDWARFDeclContext(die); - - if (!type_sp) { - SymbolFileDWARFDebugMap *debug_map_symfile = dwarf->GetDebugMapSymfile(); - if (debug_map_symfile) { - // We weren't able to find a full declaration in this DWARF, see - // if we have a declaration anywhere else... - type_sp = debug_map_symfile->FindDefinitionTypeForDWARFDeclContext(die); - } - } - - if (type_sp) { - if (log) { - dwarf->GetObjectFile()->GetModule()->LogMessage( - log, - "SymbolFileDWARF({0:p}) - {1:x16}: {2} ({3}) type \"{4}\" is a " - "forward declaration, complete type is {5:x8}", - static_cast(this), die.GetOffset(), - DW_TAG_value_to_name(tag), tag, attrs.name.GetCString(), - type_sp->GetID()); - } - - // We found a real definition for this type elsewhere so lets use - // it and cache the fact that we found a complete type for this die - dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); - clang::DeclContext *defn_decl_ctx = - GetCachedClangDeclContextForDIE(dwarf->GetDIE(type_sp->GetID())); - if (defn_decl_ctx) - LinkDeclContextToDIE(defn_decl_ctx, die); - return type_sp; - } } + assert(tag_decl_kind != -1); UNUSED_IF_ASSERT_DISABLED(tag_decl_kind); - bool clang_type_was_created = false; - clang::DeclContext *decl_ctx = GetClangDeclContextContainingDIE(die, nullptr); + DWARFDIE decl_ctx_die; + clang::DeclContext *decl_ctx = + GetClangDeclContextContainingDIE(die, &decl_ctx_die); - PrepareContextToReceiveMembers(m_ast, GetClangASTImporter(), decl_ctx, die, + PrepareContextToReceiveMembers(decl_ctx, decl_ctx_die, die, attrs.name.GetCString()); if (attrs.accessibility == eAccessNone && decl_ctx) { @@ -1852,20 +1893,17 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, tag_decl_kind, template_param_infos); clang_type = m_ast.CreateClassTemplateSpecializationType(class_specialization_decl); - clang_type_was_created = true; m_ast.SetMetadata(class_template_decl, metadata); m_ast.SetMetadata(class_specialization_decl, metadata); } - if (!clang_type_was_created) { - clang_type_was_created = true; + if (!clang_type) { clang_type = m_ast.CreateRecordType( decl_ctx, GetOwningClangModule(die), attrs.accessibility, attrs.name.GetCString(), tag_decl_kind, attrs.class_language, &metadata, attrs.exports_symbols); } - // Store a forward declaration to this class type in case any // parameters in any class methods need it for the clang types for // function prototypes. @@ -1876,13 +1914,19 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, Type::ResolveState::Forward, TypePayloadClang(OptionalClangModuleID(), attrs.is_complete_objc_class)); + // UniqueDWARFASTType is large, so don't create a local variables on the + // stack, put it on the heap. This function is often called recursively and + // clang isn't good at sharing the stack space for variables in different + // blocks. + auto unique_ast_entry_up = std::make_unique(); // Add our type to the unique type map so we don't end up creating many // copies of the same type over and over in the ASTContext for our // module unique_ast_entry_up->m_type_sp = type_sp; unique_ast_entry_up->m_die = die; unique_ast_entry_up->m_declaration = unique_decl; - unique_ast_entry_up->m_byte_size = attrs.byte_size.value_or(0); + unique_ast_entry_up->m_byte_size = byte_size; + unique_ast_entry_up->m_is_forward_declaration = attrs.is_forward_declaration; dwarf->GetUniqueDWARFASTTypeMap().Insert(unique_typename, *unique_ast_entry_up); @@ -1923,7 +1967,7 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, GetClangASTImporter().SetRecordLayout(record_decl, layout); } } - } else if (clang_type_was_created) { + } else { // Start the definition if the class is not objective C since the // underlying decls respond to isCompleteDefinition(). Objective // C decls don't respond to isCompleteDefinition() so we can't @@ -1935,26 +1979,21 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, if (attrs.class_language != eLanguageTypeObjC && attrs.class_language != eLanguageTypeObjC_plus_plus) TypeSystemClang::StartTagDeclarationDefinition(clang_type); - - // Leave this as a forward declaration until we need to know the - // details of the type. lldb_private::Type will automatically call - // the SymbolFile virtual function - // "SymbolFileDWARF::CompleteType(Type *)" When the definition - // needs to be defined. - assert(!dwarf->GetForwardDeclCompilerTypeToDIE().count( - ClangUtil::RemoveFastQualifiers(clang_type) - .GetOpaqueQualType()) && - "Type already in the forward declaration map!"); - // Can't assume m_ast.GetSymbolFile() is actually a - // SymbolFileDWARF, it can be a SymbolFileDWARFDebugMap for Apple - // binaries. - dwarf->GetForwardDeclCompilerTypeToDIE().try_emplace( - ClangUtil::RemoveFastQualifiers(clang_type).GetOpaqueQualType(), - *die.GetDIERef()); - m_ast.SetHasExternalStorage(clang_type.GetOpaqueQualType(), true); } } + // If this is a declaration DIE, leave this as a forward declaration until we + // need to know the details of the type. lldb_private::Type will automatically + // call the SymbolFile virtual function "SymbolFileDWARF::CompleteType(Type + // *)" When the definition needs to be defined. + assert(!dwarf->GetForwardDeclCompilerTypeToDIE().count( + ClangUtil::RemoveFastQualifiers(clang_type).GetOpaqueQualType()) && + "Type already in the forward declaration map!"); + dwarf->GetForwardDeclCompilerTypeToDIE().try_emplace( + ClangUtil::RemoveFastQualifiers(clang_type).GetOpaqueQualType(), + *die.GetDIERef()); + m_ast.SetHasExternalStorage(clang_type.GetOpaqueQualType(), true); + // If we made a clang type, set the trivial abi if applicable: We only // do this for pass by value - which implies the Trivial ABI. There // isn't a way to assert that something that would normally be pass by diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index 8d4af203bb287..853b8ccc30369 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -42,40 +42,40 @@ struct ParsedDWARFTypeAttributes; class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser { public: + typedef lldb_private::plugin::dwarf::DWARFDIE DWARFDIE; + DWARFASTParserClang(lldb_private::TypeSystemClang &ast); ~DWARFASTParserClang() override; // DWARFASTParser interface. - lldb::TypeSP - ParseTypeFromDWARF(const lldb_private::SymbolContext &sc, - const lldb_private::plugin::dwarf::DWARFDIE &die, - bool *type_is_new_ptr) override; + lldb::TypeSP ParseTypeFromDWARF(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, + bool *type_is_new_ptr) override; - lldb_private::ConstString ConstructDemangledNameFromDWARF( - const lldb_private::plugin::dwarf::DWARFDIE &die) override; + lldb_private::ConstString + ConstructDemangledNameFromDWARF(const DWARFDIE &die) override; lldb_private::Function * ParseFunctionFromDWARF(lldb_private::CompileUnit &comp_unit, - const lldb_private::plugin::dwarf::DWARFDIE &die, + const DWARFDIE &die, const lldb_private::AddressRange &func_range) override; bool - CompleteTypeFromDWARF(const lldb_private::plugin::dwarf::DWARFDIE &die, - lldb_private::Type *type, + CompleteTypeFromDWARF(const DWARFDIE &die, lldb_private::Type *type, lldb_private::CompilerType &compiler_type) override; - lldb_private::CompilerDecl GetDeclForUIDFromDWARF( - const lldb_private::plugin::dwarf::DWARFDIE &die) override; + lldb_private::CompilerDecl + GetDeclForUIDFromDWARF(const DWARFDIE &die) override; void EnsureAllDIEsInDeclContextHaveBeenParsed( lldb_private::CompilerDeclContext decl_context) override; - lldb_private::CompilerDeclContext GetDeclContextForUIDFromDWARF( - const lldb_private::plugin::dwarf::DWARFDIE &die) override; + lldb_private::CompilerDeclContext + GetDeclContextForUIDFromDWARF(const DWARFDIE &die) override; - lldb_private::CompilerDeclContext GetDeclContextContainingUIDFromDWARF( - const lldb_private::plugin::dwarf::DWARFDIE &die) override; + lldb_private::CompilerDeclContext + GetDeclContextContainingUIDFromDWARF(const DWARFDIE &die) override; lldb_private::ClangASTImporter &GetClangASTImporter(); @@ -105,8 +105,13 @@ class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser { /// \return A string, including surrounding '<>', of the template parameters. /// If the DIE's name already has '<>', returns an empty ConstString because /// it's assumed that the caller is using the DIE name anyway. - lldb_private::ConstString GetDIEClassTemplateParams( - const lldb_private::plugin::dwarf::DWARFDIE &die) override; + lldb_private::ConstString + GetDIEClassTemplateParams(const DWARFDIE &die) override; + + // Searching for definition DIE for the given DIE and return the type + // associated with the definition DIE, or nullptr if definition DIE is not + // found. + lldb_private::Type *FindDefinitionTypeForDIE(const DWARFDIE &die) override; protected: /// Protected typedefs and members. @@ -118,8 +123,7 @@ class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser { const lldb_private::plugin::dwarf::DWARFDebugInfoEntry *, clang::DeclContext *> DIEToDeclContextMap; - typedef std::multimap + typedef std::multimap DeclContextToDIEMap; typedef llvm::DenseMap< const lldb_private::plugin::dwarf::DWARFDebugInfoEntry *, @@ -137,14 +141,11 @@ class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser { std::unique_ptr m_clang_ast_importer_up; /// @} - clang::DeclContext * - GetDeclContextForBlock(const lldb_private::plugin::dwarf::DWARFDIE &die); + clang::DeclContext *GetDeclContextForBlock(const DWARFDIE &die); - clang::BlockDecl * - ResolveBlockDIE(const lldb_private::plugin::dwarf::DWARFDIE &die); + clang::BlockDecl *ResolveBlockDIE(const DWARFDIE &die); - clang::NamespaceDecl * - ResolveNamespaceDIE(const lldb_private::plugin::dwarf::DWARFDIE &die); + clang::NamespaceDecl *ResolveNamespaceDIE(const DWARFDIE &die); /// Returns the namespace decl that a DW_TAG_imported_declaration imports. /// @@ -155,96 +156,86 @@ class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser { /// 'die' imports. If the imported entity is not a namespace /// or another import declaration, returns nullptr. If an error /// occurs, returns nullptr. - clang::NamespaceDecl *ResolveImportedDeclarationDIE( - const lldb_private::plugin::dwarf::DWARFDIE &die); + clang::NamespaceDecl *ResolveImportedDeclarationDIE(const DWARFDIE &die); - bool ParseTemplateDIE(const lldb_private::plugin::dwarf::DWARFDIE &die, + bool ParseTemplateDIE(const DWARFDIE &die, lldb_private::TypeSystemClang::TemplateParameterInfos &template_param_infos); bool ParseTemplateParameterInfos( - const lldb_private::plugin::dwarf::DWARFDIE &parent_die, + const DWARFDIE &parent_die, lldb_private::TypeSystemClang::TemplateParameterInfos &template_param_infos); - std::string - GetCPlusPlusQualifiedName(const lldb_private::plugin::dwarf::DWARFDIE &die); + std::string GetCPlusPlusQualifiedName(const DWARFDIE &die); bool ParseChildMembers( - const lldb_private::plugin::dwarf::DWARFDIE &die, - lldb_private::CompilerType &class_compiler_type, + const DWARFDIE &die, lldb_private::CompilerType &class_compiler_type, std::vector> &base_classes, - std::vector &member_function_dies, - std::vector &contained_type_dies, + std::vector &member_function_dies, + std::vector &contained_type_dies, DelayedPropertyList &delayed_properties, const lldb::AccessType default_accessibility, lldb_private::ClangASTImporter::LayoutInfo &layout_info); size_t ParseChildParameters(clang::DeclContext *containing_decl_ctx, - const lldb_private::plugin::dwarf::DWARFDIE &parent_die, - bool skip_artificial, bool &is_static, bool &is_variadic, + const DWARFDIE &parent_die, bool skip_artificial, + bool &is_static, bool &is_variadic, bool &has_template_params, std::vector &function_args, std::vector &function_param_decls, unsigned &type_quals); - size_t ParseChildEnumerators( - lldb_private::CompilerType &compiler_type, bool is_signed, - uint32_t enumerator_byte_size, - const lldb_private::plugin::dwarf::DWARFDIE &parent_die); + size_t ParseChildEnumerators(lldb_private::CompilerType &compiler_type, + bool is_signed, uint32_t enumerator_byte_size, + const DWARFDIE &parent_die); /// Parse a structure, class, or union type DIE. - lldb::TypeSP - ParseStructureLikeDIE(const lldb_private::SymbolContext &sc, - const lldb_private::plugin::dwarf::DWARFDIE &die, - ParsedDWARFTypeAttributes &attrs); + lldb::TypeSP ParseStructureLikeDIE(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs); - clang::Decl * - GetClangDeclForDIE(const lldb_private::plugin::dwarf::DWARFDIE &die); + clang::Decl *GetClangDeclForDIE(const DWARFDIE &die); - clang::DeclContext * - GetClangDeclContextForDIE(const lldb_private::plugin::dwarf::DWARFDIE &die); + clang::DeclContext *GetClangDeclContextForDIE(const DWARFDIE &die); - clang::DeclContext *GetClangDeclContextContainingDIE( - const lldb_private::plugin::dwarf::DWARFDIE &die, - lldb_private::plugin::dwarf::DWARFDIE *decl_ctx_die); - lldb_private::OptionalClangModuleID - GetOwningClangModule(const lldb_private::plugin::dwarf::DWARFDIE &die); + clang::DeclContext *GetClangDeclContextContainingDIE(const DWARFDIE &die, + DWARFDIE *decl_ctx_die); + lldb_private::OptionalClangModuleID GetOwningClangModule(const DWARFDIE &die); - bool CopyUniqueClassMethodTypes( - const lldb_private::plugin::dwarf::DWARFDIE &src_class_die, - const lldb_private::plugin::dwarf::DWARFDIE &dst_class_die, - lldb_private::Type *class_type, - std::vector &failures); + bool CopyUniqueClassMethodTypes(const DWARFDIE &src_class_die, + const DWARFDIE &dst_class_die, + lldb_private::Type *class_type, + std::vector &failures); - clang::DeclContext *GetCachedClangDeclContextForDIE( - const lldb_private::plugin::dwarf::DWARFDIE &die); + clang::DeclContext *GetCachedClangDeclContextForDIE(const DWARFDIE &die); - void LinkDeclContextToDIE(clang::DeclContext *decl_ctx, - const lldb_private::plugin::dwarf::DWARFDIE &die); + void LinkDeclContextToDIE(clang::DeclContext *decl_ctx, const DWARFDIE &die); - void LinkDeclToDIE(clang::Decl *decl, - const lldb_private::plugin::dwarf::DWARFDIE &die); + void LinkDeclToDIE(clang::Decl *decl, const DWARFDIE &die); /// If \p type_sp is valid, calculate and set its symbol context scope, and /// update the type list for its backing symbol file. /// /// Returns \p type_sp. - lldb::TypeSP UpdateSymbolContextScopeForType( - const lldb_private::SymbolContext &sc, - const lldb_private::plugin::dwarf::DWARFDIE &die, lldb::TypeSP type_sp); + lldb::TypeSP + UpdateSymbolContextScopeForType(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, lldb::TypeSP type_sp); /// Follow Clang Module Skeleton CU references to find a type definition. - lldb::TypeSP - ParseTypeFromClangModule(const lldb_private::SymbolContext &sc, - const lldb_private::plugin::dwarf::DWARFDIE &die, - lldb_private::Log *log); + lldb::TypeSP ParseTypeFromClangModule(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, + lldb_private::Log *log); // Return true if this type is a declaration to a type in an external // module. - lldb::ModuleSP - GetModuleForType(const lldb_private::plugin::dwarf::DWARFDIE &die); + lldb::ModuleSP GetModuleForType(const DWARFDIE &die); + + void PrepareContextToReceiveMembers(clang::DeclContext *decl_ctx, + const DWARFDIE &decl_ctx_die, + const DWARFDIE &die, + const char *type_name_cstr); static bool classof(const DWARFASTParser *Parser) { return Parser->GetKind() == Kind::DWARFASTParserClang; @@ -274,10 +265,8 @@ class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser { /// Parsed form of all attributes that are relevant for parsing type members. struct MemberAttributes { - explicit MemberAttributes( - const lldb_private::plugin::dwarf::DWARFDIE &die, - const lldb_private::plugin::dwarf::DWARFDIE &parent_die, - lldb::ModuleSP module_sp); + explicit MemberAttributes(const DWARFDIE &die, const DWARFDIE &parent_die, + lldb::ModuleSP module_sp); const char *name = nullptr; /// Indicates how many bits into the word (according to the host endianness) /// the low-order bit of the field starts. Can be negative. @@ -324,15 +313,12 @@ class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser { /// created property. /// \param delayed_properties The list of delayed properties that the result /// will be appended to. - void - ParseObjCProperty(const lldb_private::plugin::dwarf::DWARFDIE &die, - const lldb_private::plugin::dwarf::DWARFDIE &parent_die, - const lldb_private::CompilerType &class_clang_type, - DelayedPropertyList &delayed_properties); + void ParseObjCProperty(const DWARFDIE &die, const DWARFDIE &parent_die, + const lldb_private::CompilerType &class_clang_type, + DelayedPropertyList &delayed_properties); void - ParseSingleMember(const lldb_private::plugin::dwarf::DWARFDIE &die, - const lldb_private::plugin::dwarf::DWARFDIE &parent_die, + ParseSingleMember(const DWARFDIE &die, const DWARFDIE &parent_die, const lldb_private::CompilerType &class_clang_type, lldb::AccessType default_accessibility, lldb_private::ClangASTImporter::LayoutInfo &layout_info, @@ -350,31 +336,25 @@ class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser { /// \param[in] class_clang_type The parent RecordType of the static /// member this function will create. void CreateStaticMemberVariable( - const lldb_private::plugin::dwarf::DWARFDIE &die, - const MemberAttributes &attrs, + const DWARFDIE &die, const MemberAttributes &attrs, const lldb_private::CompilerType &class_clang_type); - bool CompleteRecordType(const lldb_private::plugin::dwarf::DWARFDIE &die, - lldb_private::Type *type, + bool CompleteRecordType(const DWARFDIE &die, lldb_private::Type *type, lldb_private::CompilerType &clang_type); - bool CompleteEnumType(const lldb_private::plugin::dwarf::DWARFDIE &die, - lldb_private::Type *type, + bool CompleteEnumType(const DWARFDIE &die, lldb_private::Type *type, lldb_private::CompilerType &clang_type); - lldb::TypeSP - ParseTypeModifier(const lldb_private::SymbolContext &sc, - const lldb_private::plugin::dwarf::DWARFDIE &die, - ParsedDWARFTypeAttributes &attrs); + lldb::TypeSP ParseTypeModifier(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs); lldb::TypeSP ParseEnum(const lldb_private::SymbolContext &sc, - const lldb_private::plugin::dwarf::DWARFDIE &die, - ParsedDWARFTypeAttributes &attrs); - lldb::TypeSP ParseSubroutine(const lldb_private::plugin::dwarf::DWARFDIE &die, + const DWARFDIE &die, ParsedDWARFTypeAttributes &attrs); + lldb::TypeSP ParseSubroutine(const DWARFDIE &die, const ParsedDWARFTypeAttributes &attrs); - lldb::TypeSP ParseArrayType(const lldb_private::plugin::dwarf::DWARFDIE &die, + lldb::TypeSP ParseArrayType(const DWARFDIE &die, const ParsedDWARFTypeAttributes &attrs); - lldb::TypeSP - ParsePointerToMemberType(const lldb_private::plugin::dwarf::DWARFDIE &die, - const ParsedDWARFTypeAttributes &attrs); + lldb::TypeSP ParsePointerToMemberType(const DWARFDIE &die, + const ParsedDWARFTypeAttributes &attrs); /// Parses a DW_TAG_inheritance DIE into a base/super class. /// @@ -391,8 +371,7 @@ class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser { /// \param layout_info The layout information that will be updated for C++ /// base classes with the base offset. void ParseInheritance( - const lldb_private::plugin::dwarf::DWARFDIE &die, - const lldb_private::plugin::dwarf::DWARFDIE &parent_die, + const DWARFDIE &die, const DWARFDIE &parent_die, const lldb_private::CompilerType class_clang_type, const lldb::AccessType default_accessibility, const lldb::ModuleSP &module_sp, @@ -409,8 +388,7 @@ class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser { /// \param layout_info The layout information that will be updated for // base classes with the base offset void - ParseRustVariantPart(lldb_private::plugin::dwarf::DWARFDIE &die, - const lldb_private::plugin::dwarf::DWARFDIE &parent_die, + ParseRustVariantPart(DWARFDIE &die, const DWARFDIE &parent_die, lldb_private::CompilerType &class_clang_type, const lldb::AccessType default_accesibility, lldb_private::ClangASTImporter::LayoutInfo &layout_info); @@ -420,8 +398,9 @@ class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser { /// Some attributes are relevant for all kinds of types (declaration), while /// others are only meaningful to a specific type (is_virtual) struct ParsedDWARFTypeAttributes { - explicit ParsedDWARFTypeAttributes( - const lldb_private::plugin::dwarf::DWARFDIE &die); + typedef lldb_private::plugin::dwarf::DWARFDIE DWARFDIE; + + explicit ParsedDWARFTypeAttributes(const DWARFDIE &die); lldb::AccessType accessibility = lldb::eAccessNone; bool is_artificial = false; @@ -438,7 +417,7 @@ struct ParsedDWARFTypeAttributes { const char *mangled_name = nullptr; lldb_private::ConstString name; lldb_private::Declaration decl; - lldb_private::plugin::dwarf::DWARFDIE object_pointer; + DWARFDIE object_pointer; lldb_private::plugin::dwarf::DWARFFormValue abstract_origin; lldb_private::plugin::dwarf::DWARFFormValue containing_type; lldb_private::plugin::dwarf::DWARFFormValue signature; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp index 4884374ef9472..03e289bbf3300 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp @@ -13,6 +13,7 @@ #include "DWARFDebugInfoEntry.h" #include "DWARFDeclContext.h" #include "DWARFUnit.h" +#include "lldb/Symbol/Type.h" #include "llvm/ADT/iterator.h" @@ -379,108 +380,118 @@ std::vector DWARFDIE::GetDeclContextDIEs() const { return result; } -static std::vector -GetDeclContextImpl(llvm::SmallSet &seen, DWARFDIE die) { - std::vector context; +static void GetDeclContextImpl(DWARFDIE die, + llvm::SmallSet &seen, + std::vector &context) { // Stop if we hit a cycle. - if (!die || !seen.insert(die.GetID()).second) - return context; - - // Handle outline member function DIEs by following the specification. - if (DWARFDIE spec = die.GetReferencedDIE(DW_AT_specification)) - return GetDeclContextImpl(seen, spec); - - // Get the parent context chain. - context = GetDeclContextImpl(seen, die.GetParent()); + while (die && seen.insert(die.GetID()).second) { + // Handle outline member function DIEs by following the specification. + if (DWARFDIE spec = die.GetReferencedDIE(DW_AT_specification)) { + die = spec; + continue; + } - // Add this DIE's contribution at the end of the chain. - auto push_ctx = [&](CompilerContextKind kind, llvm::StringRef name) { - context.push_back({kind, ConstString(name)}); - }; - switch (die.Tag()) { - case DW_TAG_module: - push_ctx(CompilerContextKind::Module, die.GetName()); - break; - case DW_TAG_namespace: - push_ctx(CompilerContextKind::Namespace, die.GetName()); - break; - case DW_TAG_structure_type: - push_ctx(CompilerContextKind::Struct, die.GetName()); - break; - case DW_TAG_union_type: - push_ctx(CompilerContextKind::Union, die.GetName()); - break; - case DW_TAG_class_type: - push_ctx(CompilerContextKind::Class, die.GetName()); - break; - case DW_TAG_enumeration_type: - push_ctx(CompilerContextKind::Enum, die.GetName()); - break; - case DW_TAG_subprogram: - push_ctx(CompilerContextKind::Function, die.GetName()); - break; - case DW_TAG_variable: - push_ctx(CompilerContextKind::Variable, die.GetPubname()); - break; - case DW_TAG_typedef: - push_ctx(CompilerContextKind::Typedef, die.GetName()); - break; - default: - break; + // Add this DIE's contribution at the end of the chain. + auto push_ctx = [&](CompilerContextKind kind, llvm::StringRef name) { + context.push_back({kind, ConstString(name)}); + }; + switch (die.Tag()) { + case DW_TAG_module: + push_ctx(CompilerContextKind::Module, die.GetName()); + break; + case DW_TAG_namespace: + push_ctx(CompilerContextKind::Namespace, die.GetName()); + break; + case DW_TAG_structure_type: + push_ctx(CompilerContextKind::Struct, die.GetName()); + break; + case DW_TAG_union_type: + push_ctx(CompilerContextKind::Union, die.GetName()); + break; + case DW_TAG_class_type: + push_ctx(CompilerContextKind::Class, die.GetName()); + break; + case DW_TAG_enumeration_type: + push_ctx(CompilerContextKind::Enum, die.GetName()); + break; + case DW_TAG_subprogram: + push_ctx(CompilerContextKind::Function, die.GetName()); + break; + case DW_TAG_variable: + push_ctx(CompilerContextKind::Variable, die.GetPubname()); + break; + case DW_TAG_typedef: + push_ctx(CompilerContextKind::Typedef, die.GetName()); + break; + default: + break; + } + // Now process the parent. + die = die.GetParent(); } - return context; } -std::vector DWARFDIE::GetDeclContext() const { +std::vector DWARFDIE::GetDeclContext() const { llvm::SmallSet seen; - return GetDeclContextImpl(seen, *this); + std::vector context; + GetDeclContextImpl(*this, seen, context); + std::reverse(context.begin(), context.end()); + return context; } -std::vector -DWARFDIE::GetTypeLookupContext() const { - std::vector context; - // If there is no name, then there is no need to look anything up for this - // DIE. - const char *name = GetName(); - if (!name || !name[0]) - return context; - const dw_tag_t tag = Tag(); - if (tag == DW_TAG_compile_unit || tag == DW_TAG_partial_unit) - return context; - DWARFDIE parent = GetParent(); - if (parent) - context = parent.GetTypeLookupContext(); - auto push_ctx = [&](CompilerContextKind kind, llvm::StringRef name) { - context.push_back({kind, ConstString(name)}); - }; - switch (tag) { - case DW_TAG_namespace: - push_ctx(CompilerContextKind::Namespace, name); - break; - case DW_TAG_structure_type: - push_ctx(CompilerContextKind::Struct, name); - break; - case DW_TAG_union_type: - push_ctx(CompilerContextKind::Union, name); - break; - case DW_TAG_class_type: - push_ctx(CompilerContextKind::Class, name); - break; - case DW_TAG_enumeration_type: - push_ctx(CompilerContextKind::Enum, name); - break; - case DW_TAG_variable: - push_ctx(CompilerContextKind::Variable, GetPubname()); - break; - case DW_TAG_typedef: - push_ctx(CompilerContextKind::Typedef, name); - break; - case DW_TAG_base_type: - push_ctx(CompilerContextKind::Builtin, name); - break; - default: - break; +static void GetTypeLookupContextImpl(DWARFDIE die, + llvm::SmallSet &seen, + std::vector &context) { + // Stop if we hit a cycle. + while (die && seen.insert(die.GetID()).second) { + // If there is no name, then there is no need to look anything up for this + // DIE. + const char *name = die.GetName(); + if (!name || !name[0]) + return; + + // Add this DIE's contribution at the end of the chain. + auto push_ctx = [&](CompilerContextKind kind, llvm::StringRef name) { + context.push_back({kind, ConstString(name)}); + }; + switch (die.Tag()) { + case DW_TAG_namespace: + push_ctx(CompilerContextKind::Namespace, die.GetName()); + break; + case DW_TAG_structure_type: + push_ctx(CompilerContextKind::Struct, die.GetName()); + break; + case DW_TAG_union_type: + push_ctx(CompilerContextKind::Union, die.GetName()); + break; + case DW_TAG_class_type: + push_ctx(CompilerContextKind::Class, die.GetName()); + break; + case DW_TAG_enumeration_type: + push_ctx(CompilerContextKind::Enum, die.GetName()); + break; + case DW_TAG_variable: + push_ctx(CompilerContextKind::Variable, die.GetPubname()); + break; + case DW_TAG_typedef: + push_ctx(CompilerContextKind::Typedef, die.GetName()); + break; + case DW_TAG_base_type: + push_ctx(CompilerContextKind::Builtin, name); + break; + default: + break; + } + // Now process the parent. + die = die.GetParent(); } +} + +std::vector DWARFDIE::GetTypeLookupContext() const { + llvm::SmallSet seen; + std::vector context; + GetTypeLookupContextImpl(*this, seen, context); + std::reverse(context.begin(), context.end()); return context; } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp index 20c07a94b5076..30fb5d5ebdb0d 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp @@ -24,16 +24,11 @@ using namespace lldb_private::plugin::dwarf; DWARFIndex::~DWARFIndex() = default; bool DWARFIndex::ProcessFunctionDIE( - const Module::LookupInfo &lookup_info, DIERef ref, SymbolFileDWARF &dwarf, + const Module::LookupInfo &lookup_info, DWARFDIE die, const CompilerDeclContext &parent_decl_ctx, llvm::function_ref callback) { llvm::StringRef name = lookup_info.GetLookupName().GetStringRef(); FunctionNameType name_type_mask = lookup_info.GetNameTypeMask(); - DWARFDIE die = dwarf.GetDIE(ref); - if (!die) { - ReportInvalidDIERef(ref, name); - return true; - } if (!(name_type_mask & eFunctionNameTypeFull)) { ConstString name_to_match_against; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h index 0551b07100a96..cb3ae8a06d788 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h @@ -81,11 +81,10 @@ class DWARFIndex { StatsDuration m_index_time; /// Helper function implementing common logic for processing function dies. If - /// the function given by "ref" matches search criteria given by - /// "parent_decl_ctx" and "name_type_mask", it is inserted into the "dies" - /// vector. - bool ProcessFunctionDIE(const Module::LookupInfo &lookup_info, DIERef ref, - SymbolFileDWARF &dwarf, + /// the function given by "die" matches search criteria given by + /// "parent_decl_ctx" and "name_type_mask", it calls the callback with the + /// given die. + bool ProcessFunctionDIE(const Module::LookupInfo &lookup_info, DWARFDIE die, const CompilerDeclContext &parent_decl_ctx, llvm::function_ref callback); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp index 79400e36e04f3..56717bab1ecd8 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp @@ -64,29 +64,31 @@ DebugNamesDWARFIndex::GetNonSkeletonUnit(const DebugNames::Entry &entry) const { return cu ? &cu->GetNonSkeletonUnit() : nullptr; } -std::optional -DebugNamesDWARFIndex::ToDIERef(const DebugNames::Entry &entry) const { +DWARFDIE DebugNamesDWARFIndex::GetDIE(const DebugNames::Entry &entry) const { DWARFUnit *unit = GetNonSkeletonUnit(entry); - if (!unit) - return std::nullopt; - if (std::optional die_offset = entry.getDIEUnitOffset()) - return DIERef(unit->GetSymbolFileDWARF().GetFileIndex(), - DIERef::Section::DebugInfo, unit->GetOffset() + *die_offset); - - return std::nullopt; + std::optional die_offset = entry.getDIEUnitOffset(); + if (!unit || !die_offset) + return DWARFDIE(); + if (DWARFDIE die = unit->GetDIE(unit->GetOffset() + *die_offset)) + return die; + + m_module.ReportErrorIfModifyDetected( + "the DWARF debug information has been modified (bad offset {0:x} in " + "debug_names section)\n", + *die_offset); + return DWARFDIE(); } bool DebugNamesDWARFIndex::ProcessEntry( const DebugNames::Entry &entry, llvm::function_ref callback) { - std::optional ref = ToDIERef(entry); - if (!ref) - return true; - SymbolFileDWARF &dwarf = *llvm::cast( - m_module.GetSymbolFile()->GetBackingSymbolFile()); - DWARFDIE die = dwarf.GetDIE(*ref); + DWARFDIE die = GetDIE(entry); if (!die) return true; + // Clang erroneously emits index entries for declaration DIEs in case when the + // definition is in a type unit (llvm.org/pr77696). Weed those out. + if (die.GetAttributeValueAsUnsigned(DW_AT_declaration, 0)) + return true; return callback(die); } @@ -183,7 +185,7 @@ void DebugNamesDWARFIndex::GetCompleteObjCClass( llvm::function_ref callback) { // Keep a list of incomplete types as fallback for when we don't find the // complete type. - DIEArray incomplete_types; + std::vector incomplete_types; for (const DebugNames::Entry &entry : m_debug_names_up->equal_range(class_name.GetStringRef())) { @@ -191,19 +193,14 @@ void DebugNamesDWARFIndex::GetCompleteObjCClass( entry.tag() != DW_TAG_class_type) continue; - std::optional ref = ToDIERef(entry); - if (!ref) - continue; - - DWARFUnit *cu = m_debug_info.GetUnit(*ref); - if (!cu || !cu->Supports_DW_AT_APPLE_objc_complete_type()) { - incomplete_types.push_back(*ref); + DWARFDIE die = GetDIE(entry); + if (!die) { + // Report invalid continue; } - - DWARFDIE die = m_debug_info.GetDIE(*ref); - if (!die) { - ReportInvalidDIERef(*ref, class_name.GetStringRef()); + DWARFUnit *cu = die.GetCU(); + if (!cu->Supports_DW_AT_APPLE_objc_complete_type()) { + incomplete_types.push_back(die); continue; } @@ -212,12 +209,11 @@ void DebugNamesDWARFIndex::GetCompleteObjCClass( callback(die); return; } - incomplete_types.push_back(*ref); + incomplete_types.push_back(die); } - auto dierefcallback = DIERefCallback(callback, class_name.GetStringRef()); - for (DIERef ref : incomplete_types) - if (!dierefcallback(ref)) + for (DWARFDIE die : incomplete_types) + if (!callback(die)) return; m_fallback.GetCompleteObjCClass(class_name, must_be_implementation, callback); @@ -379,8 +375,8 @@ void DebugNamesDWARFIndex::GetFunctions( if (tag != DW_TAG_subprogram && tag != DW_TAG_inlined_subroutine) continue; - if (std::optional ref = ToDIERef(entry)) { - if (!ProcessFunctionDIE(lookup_info, *ref, dwarf, parent_decl_ctx, + if (DWARFDIE die = GetDIE(entry)) { + if (!ProcessFunctionDIE(lookup_info, die, parent_decl_ctx, [&](DWARFDIE die) { if (!seen.insert(die.GetDIE()).second) return true; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h index 81fb8f88b805a..a27a414ecdd19 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h @@ -84,7 +84,7 @@ class DebugNamesDWARFIndex : public DWARFIndex { ManualDWARFIndex m_fallback; DWARFUnit *GetNonSkeletonUnit(const DebugNames::Entry &entry) const; - std::optional ToDIERef(const DebugNames::Entry &entry) const; + DWARFDIE GetDIE(const DebugNames::Entry &entry) const; bool ProcessEntry(const DebugNames::Entry &entry, llvm::function_ref callback); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index f6f152726bf74..bc489e5b8ad46 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -481,6 +481,13 @@ static ConstString GetDWARFMachOSegmentName() { return g_dwarf_section_name; } +llvm::DenseMap & +SymbolFileDWARF::GetForwardDeclCompilerTypeToDIE() { + if (SymbolFileDWARFDebugMap *debug_map_symfile = GetDebugMapSymfile()) + return debug_map_symfile->GetForwardDeclCompilerTypeToDIE(); + return m_forward_decl_compiler_type_to_die; +} + UniqueDWARFASTTypeMap &SymbolFileDWARF::GetUniqueDWARFASTTypeMap() { SymbolFileDWARFDebugMap *debug_map_symfile = GetDebugMapSymfile(); if (debug_map_symfile) @@ -1632,27 +1639,33 @@ bool SymbolFileDWARF::CompleteType(CompilerType &compiler_type) { return true; } - DWARFDIE dwarf_die = GetDIE(die_it->getSecond()); - if (dwarf_die) { - // Once we start resolving this type, remove it from the forward - // declaration map in case anyone child members or other types require this - // type to get resolved. The type will get resolved when all of the calls - // to SymbolFileDWARF::ResolveClangOpaqueTypeDefinition are done. - GetForwardDeclCompilerTypeToDIE().erase(die_it); - - Type *type = GetDIEToType().lookup(dwarf_die.GetDIE()); + // Once we start resolving this type, remove it from the forward + // declaration map in case anyone's child members or other types require this + // type to get resolved. + DWARFDIE dwarf_die = GetDIE(die_it->second); + GetForwardDeclCompilerTypeToDIE().erase(die_it); + Type *type = nullptr; + if (DWARFASTParser *dwarf_ast = GetDWARFParser(*dwarf_die.GetCU())) + type = dwarf_ast->FindDefinitionTypeForDIE(dwarf_die); + if (!type) + return false; - Log *log = GetLog(DWARFLog::DebugInfo | DWARFLog::TypeCompletion); - if (log) - GetObjectFile()->GetModule()->LogMessageVerboseBacktrace( - log, "{0:x8}: {1} ({2}) '{3}' resolving forward declaration...", - dwarf_die.GetID(), DW_TAG_value_to_name(dwarf_die.Tag()), - dwarf_die.Tag(), type->GetName().AsCString()); - assert(compiler_type); - if (DWARFASTParser *dwarf_ast = GetDWARFParser(*dwarf_die.GetCU())) - return dwarf_ast->CompleteTypeFromDWARF(dwarf_die, type, compiler_type); + die_it = GetForwardDeclCompilerTypeToDIE().find( + compiler_type_no_qualifiers.GetOpaqueQualType()); + if (die_it != GetForwardDeclCompilerTypeToDIE().end()) { + dwarf_die = GetDIE(die_it->getSecond()); + GetForwardDeclCompilerTypeToDIE().erase(die_it); } - return false; + + if (Log *log = GetLog(DWARFLog::DebugInfo | DWARFLog::TypeCompletion)) + GetObjectFile()->GetModule()->LogMessageVerboseBacktrace( + log, "{0:x8}: {1} ({2}) '{3}' resolving forward declaration...", + dwarf_die.GetID(), DW_TAG_value_to_name(dwarf_die.Tag()), + dwarf_die.Tag(), type->GetName().AsCString()); + assert(compiler_type); + if (DWARFASTParser *dwarf_ast = GetDWARFParser(*dwarf_die.GetCU())) + return dwarf_ast->CompleteTypeFromDWARF(dwarf_die, type, compiler_type); + return true; } Type *SymbolFileDWARF::ResolveType(const DWARFDIE &die, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index 7282c08c6857c..35893f2072dd6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -335,12 +335,8 @@ class SymbolFileDWARF : public SymbolFileCommon { virtual DIEToTypePtr &GetDIEToType() { return m_die_to_type; } - typedef llvm::DenseMap - CompilerTypeToDIE; - - virtual CompilerTypeToDIE &GetForwardDeclCompilerTypeToDIE() { - return m_forward_decl_compiler_type_to_die; - } + virtual llvm::DenseMap & + GetForwardDeclCompilerTypeToDIE(); typedef llvm::DenseMap DIEToVariableSP; @@ -533,9 +529,14 @@ class SymbolFileDWARF : public SymbolFileCommon { NameToOffsetMap m_function_scope_qualified_name_map; std::unique_ptr m_ranges; UniqueDWARFASTTypeMap m_unique_ast_type_map; + // A map from DIE to lldb_private::Type. For record type, the key might be + // either declaration DIE or definition DIE. DIEToTypePtr m_die_to_type; DIEToVariableSP m_die_to_variable_sp; - CompilerTypeToDIE m_forward_decl_compiler_type_to_die; + // A map from CompilerType to the struct/class/union/enum DIE (might be a + // declaration or a definition) that is used to construct it. + llvm::DenseMap + m_forward_decl_compiler_type_to_die; llvm::DenseMap> m_type_unit_support_files; std::vector m_lldb_cu_to_dwarf_unit; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h index de22dd676eef0..d7d571919bc7d 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h @@ -284,6 +284,11 @@ class SymbolFileDWARFDebugMap : public SymbolFileCommon { lldb::TypeSP FindCompleteObjCDefinitionTypeForDIE( const DWARFDIE &die, ConstString type_name, bool must_be_implementation); + llvm::DenseMap & + GetForwardDeclCompilerTypeToDIE() { + return m_forward_decl_compiler_type_to_die; + } + UniqueDWARFASTTypeMap &GetUniqueDWARFASTTypeMap() { return m_unique_ast_type_map; } @@ -321,6 +326,10 @@ class SymbolFileDWARFDebugMap : public SymbolFileCommon { std::vector m_func_indexes; // Sorted by address std::vector m_glob_indexes; std::map>, OSOInfoSP> m_oso_map; + // A map from CompilerType to the struct/class/union/enum DIE (might be a + // declaration or a definition) that is used to construct it. + llvm::DenseMap + m_forward_decl_compiler_type_to_die; UniqueDWARFASTTypeMap m_unique_ast_type_map; LazyBool m_supports_DW_AT_APPLE_objc_complete_type; DebugMap m_debug_map; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp index 85e1afd0d8976..8fd369c65f86b 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp @@ -110,7 +110,7 @@ SymbolFileDWARF::DIEToVariableSP &SymbolFileDWARFDwo::GetDIEToVariable() { return GetBaseSymbolFile().GetDIEToVariable(); } -SymbolFileDWARF::CompilerTypeToDIE & +llvm::DenseMap & SymbolFileDWARFDwo::GetForwardDeclCompilerTypeToDIE() { return GetBaseSymbolFile().GetForwardDeclCompilerTypeToDIE(); } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h index 1500540424b52..2f0ac415e90d4 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h @@ -72,7 +72,8 @@ class SymbolFileDWARFDwo : public SymbolFileDWARF { DIEToVariableSP &GetDIEToVariable() override; - CompilerTypeToDIE &GetForwardDeclCompilerTypeToDIE() override; + llvm::DenseMap & + GetForwardDeclCompilerTypeToDIE() override; UniqueDWARFASTTypeMap &GetUniqueDWARFASTTypeMap() override; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp index 223518f0ae824..4762356034cab 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp @@ -13,66 +13,67 @@ using namespace lldb_private::dwarf; using namespace lldb_private::plugin::dwarf; -bool UniqueDWARFASTTypeList::Find(const DWARFDIE &die, - const lldb_private::Declaration &decl, - const int32_t byte_size, - UniqueDWARFASTType &entry) const { - for (const UniqueDWARFASTType &udt : m_collection) { +UniqueDWARFASTType *UniqueDWARFASTTypeList::Find( + const DWARFDIE &die, const lldb_private::Declaration &decl, + const int32_t byte_size, bool is_forward_declaration) { + for (UniqueDWARFASTType &udt : m_collection) { // Make sure the tags match if (udt.m_die.Tag() == die.Tag()) { - // Validate byte sizes of both types only if both are valid. - if (udt.m_byte_size < 0 || byte_size < 0 || - udt.m_byte_size == byte_size) { - // Make sure the file and line match - if (udt.m_declaration == decl) { - // The type has the same name, and was defined on the same file and - // line. Now verify all of the parent DIEs match. - DWARFDIE parent_arg_die = die.GetParent(); - DWARFDIE parent_pos_die = udt.m_die.GetParent(); - bool match = true; - bool done = false; - while (!done && match && parent_arg_die && parent_pos_die) { - const dw_tag_t parent_arg_tag = parent_arg_die.Tag(); - const dw_tag_t parent_pos_tag = parent_pos_die.Tag(); - if (parent_arg_tag == parent_pos_tag) { - switch (parent_arg_tag) { - case DW_TAG_class_type: - case DW_TAG_structure_type: - case DW_TAG_union_type: - case DW_TAG_namespace: { - const char *parent_arg_die_name = parent_arg_die.GetName(); - if (parent_arg_die_name == - nullptr) // Anonymous (i.e. no-name) struct - { - match = false; - } else { - const char *parent_pos_die_name = parent_pos_die.GetName(); - if (parent_pos_die_name == nullptr || - ((parent_arg_die_name != parent_pos_die_name) && - strcmp(parent_arg_die_name, parent_pos_die_name))) - match = false; - } - } break; - - case DW_TAG_compile_unit: - case DW_TAG_partial_unit: - done = true; - break; - default: - break; - } + // If they are not both definition DIEs or both declaration DIEs, then + // don't check for byte size and declaration location, because declaration + // DIEs usually don't have those info. + bool matching_size_declaration = + udt.m_is_forward_declaration != is_forward_declaration + ? true + : (udt.m_byte_size < 0 || byte_size < 0 || + udt.m_byte_size == byte_size) && + udt.m_declaration == decl; + if (!matching_size_declaration) + continue; + // The type has the same name, and was defined on the same file and + // line. Now verify all of the parent DIEs match. + DWARFDIE parent_arg_die = die.GetParent(); + DWARFDIE parent_pos_die = udt.m_die.GetParent(); + bool match = true; + bool done = false; + while (!done && match && parent_arg_die && parent_pos_die) { + const dw_tag_t parent_arg_tag = parent_arg_die.Tag(); + const dw_tag_t parent_pos_tag = parent_pos_die.Tag(); + if (parent_arg_tag == parent_pos_tag) { + switch (parent_arg_tag) { + case DW_TAG_class_type: + case DW_TAG_structure_type: + case DW_TAG_union_type: + case DW_TAG_namespace: { + const char *parent_arg_die_name = parent_arg_die.GetName(); + if (parent_arg_die_name == nullptr) { + // Anonymous (i.e. no-name) struct + match = false; + } else { + const char *parent_pos_die_name = parent_pos_die.GetName(); + if (parent_pos_die_name == nullptr || + ((parent_arg_die_name != parent_pos_die_name) && + strcmp(parent_arg_die_name, parent_pos_die_name))) + match = false; } - parent_arg_die = parent_arg_die.GetParent(); - parent_pos_die = parent_pos_die.GetParent(); - } + } break; - if (match) { - entry = udt; - return true; + case DW_TAG_compile_unit: + case DW_TAG_partial_unit: + done = true; + break; + default: + break; } } + parent_arg_die = parent_arg_die.GetParent(); + parent_pos_die = parent_pos_die.GetParent(); + } + + if (match) { + return &udt; } } } - return false; + return nullptr; } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h index bf3cbae55e5c7..29e5c02dcbe17 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h @@ -23,31 +23,19 @@ class UniqueDWARFASTType { // Constructors and Destructors UniqueDWARFASTType() : m_type_sp(), m_die(), m_declaration() {} - UniqueDWARFASTType(lldb::TypeSP &type_sp, const DWARFDIE &die, - const Declaration &decl, int32_t byte_size) - : m_type_sp(type_sp), m_die(die), m_declaration(decl), - m_byte_size(byte_size) {} - UniqueDWARFASTType(const UniqueDWARFASTType &rhs) : m_type_sp(rhs.m_type_sp), m_die(rhs.m_die), - m_declaration(rhs.m_declaration), m_byte_size(rhs.m_byte_size) {} + m_declaration(rhs.m_declaration), m_byte_size(rhs.m_byte_size), + m_is_forward_declaration(rhs.m_is_forward_declaration) {} ~UniqueDWARFASTType() = default; - UniqueDWARFASTType &operator=(const UniqueDWARFASTType &rhs) { - if (this != &rhs) { - m_type_sp = rhs.m_type_sp; - m_die = rhs.m_die; - m_declaration = rhs.m_declaration; - m_byte_size = rhs.m_byte_size; - } - return *this; - } - lldb::TypeSP m_type_sp; DWARFDIE m_die; Declaration m_declaration; int32_t m_byte_size = -1; + // True if the m_die is a forward declaration DIE. + bool m_is_forward_declaration = true; }; class UniqueDWARFASTTypeList { @@ -62,8 +50,9 @@ class UniqueDWARFASTTypeList { m_collection.push_back(entry); } - bool Find(const DWARFDIE &die, const Declaration &decl, - const int32_t byte_size, UniqueDWARFASTType &entry) const; + UniqueDWARFASTType *Find(const DWARFDIE &die, const Declaration &decl, + const int32_t byte_size, + bool is_forward_declaration); protected: typedef std::vector collection; @@ -80,14 +69,15 @@ class UniqueDWARFASTTypeMap { m_collection[name.GetCString()].Append(entry); } - bool Find(ConstString name, const DWARFDIE &die, const Declaration &decl, - const int32_t byte_size, UniqueDWARFASTType &entry) const { + UniqueDWARFASTType *Find(ConstString name, const DWARFDIE &die, + const Declaration &decl, const int32_t byte_size, + bool is_forward_declaration) { const char *unique_name_cstr = name.GetCString(); - collection::const_iterator pos = m_collection.find(unique_name_cstr); + collection::iterator pos = m_collection.find(unique_name_cstr); if (pos != m_collection.end()) { - return pos->second.Find(die, decl, byte_size, entry); + return pos->second.Find(die, decl, byte_size, is_forward_declaration); } - return false; + return nullptr; } protected: diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/UdtRecordCompleter.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/UdtRecordCompleter.cpp index fab3ca989c0ec..17c5f6118603f 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/UdtRecordCompleter.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/UdtRecordCompleter.cpp @@ -47,15 +47,18 @@ UdtRecordCompleter::UdtRecordCompleter( CVType cvt = m_index.tpi().getType(m_id.index); switch (cvt.kind()) { case LF_ENUM: + m_cvr.er.Options = ClassOptions::None; llvm::cantFail(TypeDeserializer::deserializeAs(cvt, m_cvr.er)); break; case LF_UNION: + m_cvr.ur.Options = ClassOptions::None; llvm::cantFail(TypeDeserializer::deserializeAs(cvt, m_cvr.ur)); m_layout.bit_size = m_cvr.ur.getSize() * 8; m_record.record.kind = Member::Union; break; case LF_CLASS: case LF_STRUCTURE: + m_cvr.cr.Options = ClassOptions::None; llvm::cantFail(TypeDeserializer::deserializeAs(cvt, m_cvr.cr)); m_layout.bit_size = m_cvr.cr.getSize() * 8; m_record.record.kind = Member::Struct; diff --git a/lldb/source/Symbol/Block.cpp b/lldb/source/Symbol/Block.cpp index 6eeabe0ff5e4d..f7d9c0d2d3306 100644 --- a/lldb/source/Symbol/Block.cpp +++ b/lldb/source/Symbol/Block.cpp @@ -314,6 +314,22 @@ bool Block::GetRangeAtIndex(uint32_t range_idx, AddressRange &range) { return false; } +AddressRanges Block::GetRanges() { + AddressRanges ranges; + Function *function = CalculateSymbolContextFunction(); + if (!function) + return ranges; + for (size_t i = 0, e = m_ranges.GetSize(); i < e; ++i) { + ranges.emplace_back(); + auto &range = ranges.back(); + const Range &vm_range = m_ranges.GetEntryRef(i); + range.GetBaseAddress() = function->GetAddressRange().GetBaseAddress(); + range.GetBaseAddress().Slide(vm_range.GetRangeBase()); + range.SetByteSize(vm_range.GetByteSize()); + } + return ranges; +} + bool Block::GetStartAddress(Address &addr) { if (m_ranges.IsEmpty()) return false; diff --git a/lldb/source/Symbol/Type.cpp b/lldb/source/Symbol/Type.cpp index 6bf69c2ded287..585808ace15ce 100644 --- a/lldb/source/Symbol/Type.cpp +++ b/lldb/source/Symbol/Type.cpp @@ -36,6 +36,13 @@ using namespace lldb; using namespace lldb_private; +llvm::raw_ostream &lldb_private::operator<<(llvm::raw_ostream &os, + const CompilerContext &rhs) { + StreamString lldb_stream; + rhs.Dump(lldb_stream); + return os << lldb_stream.GetString(); +} + bool lldb_private::contextMatches(llvm::ArrayRef context_chain, llvm::ArrayRef pattern) { auto ctx = context_chain.begin(); diff --git a/lldb/source/Target/RegisterFlags.cpp b/lldb/source/Target/RegisterFlags.cpp index b1669b85fd2fe..5274960587bf3 100644 --- a/lldb/source/Target/RegisterFlags.cpp +++ b/lldb/source/Target/RegisterFlags.cpp @@ -190,7 +190,7 @@ std::string RegisterFlags::AsTable(uint32_t max_width) const { return table; } -void RegisterFlags::ToXML(StreamString &strm) const { +void RegisterFlags::ToXML(Stream &strm) const { // Example XML: // // @@ -213,7 +213,7 @@ void RegisterFlags::ToXML(StreamString &strm) const { strm.Indent("\n"); } -void RegisterFlags::Field::ToXML(StreamString &strm) const { +void RegisterFlags::Field::ToXML(Stream &strm) const { // Example XML: // strm.Indent(); diff --git a/lldb/test/API/commands/expression/fixits/TestFixIts.py b/lldb/test/API/commands/expression/fixits/TestFixIts.py index bc53b72fe611b..1b22ed1c0077c 100644 --- a/lldb/test/API/commands/expression/fixits/TestFixIts.py +++ b/lldb/test/API/commands/expression/fixits/TestFixIts.py @@ -106,9 +106,8 @@ def test_with_target_error_applies_fixit(self): ) self.assertIn("null_pointer->first", ret_val.GetError()) - # The final function call runs into SIGILL on aarch64-linux. @expectedFailureAll( - archs=["aarch64"], oslist=["freebsd", "linux"], bugnumber="llvm.org/pr49407" + archs=["aarch64"], oslist=["freebsd"], bugnumber="llvm.org/pr49407" ) def test_with_multiple_retries(self): """Test calling expressions with errors that can be fixed by the FixIts.""" diff --git a/lldb/test/API/commands/expression/static-initializers/TestStaticInitializers.py b/lldb/test/API/commands/expression/static-initializers/TestStaticInitializers.py index 5fc37ac6a5818..ea3aa6a4608c4 100644 --- a/lldb/test/API/commands/expression/static-initializers/TestStaticInitializers.py +++ b/lldb/test/API/commands/expression/static-initializers/TestStaticInitializers.py @@ -7,8 +7,8 @@ class StaticInitializers(TestBase): @expectedFailureAll( archs="aarch64", - oslist=["freebsd", "linux"], - bugnumber="https://bugs.llvm.org/show_bug.cgi?id=44053", + oslist=["freebsd"], + bugnumber="llvm.org/pr44053", ) def test(self): """Test a static initializer.""" diff --git a/lldb/test/API/python_api/address_range/Makefile b/lldb/test/API/python_api/address_range/Makefile new file mode 100644 index 0000000000000..99998b20bcb05 --- /dev/null +++ b/lldb/test/API/python_api/address_range/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/python_api/address_range/TestAddressRange.py b/lldb/test/API/python_api/address_range/TestAddressRange.py new file mode 100644 index 0000000000000..65221e3f1b0e9 --- /dev/null +++ b/lldb/test/API/python_api/address_range/TestAddressRange.py @@ -0,0 +1,254 @@ +""" +Test SBAddressRange APIs. +""" + +import lldb +from lldbsuite.test.lldbtest import * + + +class AddressRangeTestCase(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + def setUp(self): + TestBase.setUp(self) + + self.build() + exe = self.getBuildArtifact("a.out") + + self.target = self.dbg.CreateTarget(exe) + self.assertTrue(self.target, VALID_TARGET) + self.launch_info = self.target.GetLaunchInfo() + self.launch_info.SetWorkingDirectory(self.get_process_working_directory()) + + self.bp1 = self.target.BreakpointCreateByName("main", "a.out") + self.bp2 = self.target.BreakpointCreateByName("foo", "a.out") + self.bp3 = self.target.BreakpointCreateByName("bar", "a.out") + + self.assertTrue(self.bp1.IsValid()) + self.assertTrue(self.bp2.IsValid()) + self.assertTrue(self.bp3.IsValid()) + + self.addr1 = self.bp1.GetLocationAtIndex(0).GetAddress() + self.addr2 = self.bp2.GetLocationAtIndex(0).GetAddress() + self.addr3 = self.bp3.GetLocationAtIndex(0).GetAddress() + + self.assertTrue(self.addr1.IsValid()) + self.assertTrue(self.addr2.IsValid()) + self.assertTrue(self.addr3.IsValid()) + + def test_address_range_default(self): + """Testing default constructor.""" + empty_range = lldb.SBAddressRange() + self.assertEqual(empty_range.IsValid(), False) + + def test_address_range_construction(self): + """Make sure the construction and getters work.""" + range = lldb.SBAddressRange(self.addr1, 8) + self.assertEqual(range.IsValid(), True) + self.assertEqual(range.GetBaseAddress(), self.addr1) + self.assertEqual(range.GetByteSize(), 8) + + def test_address_range_clear(self): + """Make sure the clear method works.""" + range = lldb.SBAddressRange(self.addr1, 8) + self.assertEqual(range.IsValid(), True) + self.assertEqual(range.GetBaseAddress(), self.addr1) + self.assertEqual(range.GetByteSize(), 8) + + range.Clear() + self.assertEqual(range.IsValid(), False) + + def test_function(self): + """Make sure the range works in SBFunction APIs.""" + + # Setup breakpoints in main + loc = self.bp1.GetLocationAtIndex(0) + loc_addr = loc.GetAddress() + func = loc_addr.GetFunction() + ranges = func.GetRanges() + self.assertEqual(ranges.GetSize(), 1) + + range = ranges.GetAddressRangeAtIndex(0) + self.assertEqual( + range.GetByteSize(), + func.GetEndAddress().GetOffset() - func.GetStartAddress().GetOffset(), + ) + self.assertEqual( + range.GetBaseAddress().GetOffset(), + func.GetStartAddress().GetOffset(), + ) + + def test_block(self): + """Make sure the range works in SBBlock APIs.""" + loc = self.bp1.GetLocationAtIndex(0) + loc_addr = loc.GetAddress() + block = loc_addr.GetBlock() + + ranges = block.GetRanges() + self.assertEqual(ranges.GetSize(), 1) + + range = ranges.GetAddressRangeAtIndex(0) + self.assertEqual( + range.GetByteSize(), + block.GetRangeEndAddress(0).GetOffset() + - block.GetRangeStartAddress(0).GetOffset(), + ) + self.assertEqual( + range.GetBaseAddress().GetOffset(), + block.GetRangeStartAddress(0).GetOffset(), + ) + + def test_address_range_list(self): + """Make sure the SBAddressRangeList works by adding and getting ranges.""" + range1 = lldb.SBAddressRange(self.addr1, 8) + range2 = lldb.SBAddressRange(self.addr2, 16) + range3 = lldb.SBAddressRange(self.addr3, 32) + + range_list = lldb.SBAddressRangeList() + self.assertEqual(range_list.GetSize(), 0) + + range_list.Append(range1) + range_list.Append(range2) + range_list.Append(range3) + self.assertEqual(range_list.GetSize(), 3) + self.assertRaises(IndexError, lambda: range_list[3]) + + range1_copy = range_list.GetAddressRangeAtIndex(0) + self.assertEqual(range1.GetByteSize(), range1_copy.GetByteSize()) + self.assertEqual( + range1.GetBaseAddress().GetOffset(), + range1_copy.GetBaseAddress().GetOffset(), + ) + + range2_copy = range_list.GetAddressRangeAtIndex(1) + self.assertEqual(range2.GetByteSize(), range2_copy.GetByteSize()) + self.assertEqual( + range2.GetBaseAddress().GetOffset(), + range2_copy.GetBaseAddress().GetOffset(), + ) + + range3_copy = range_list.GetAddressRangeAtIndex(2) + self.assertEqual(range3.GetByteSize(), range3_copy.GetByteSize()) + self.assertEqual( + range3.GetBaseAddress().GetOffset(), + range3_copy.GetBaseAddress().GetOffset(), + ) + + range_list.Clear() + self.assertEqual(range_list.GetSize(), 0) + + def test_address_range_list_len(self): + """Make sure the len() operator works.""" + range = lldb.SBAddressRange(self.addr1, 8) + + range_list = lldb.SBAddressRangeList() + self.assertEqual(len(range_list), 0) + + range_list.Append(range) + self.assertEqual(len(range_list), 1) + + def test_address_range_list_iterator(self): + """Make sure the SBAddressRangeList iterator works.""" + range1 = lldb.SBAddressRange(self.addr1, 8) + range2 = lldb.SBAddressRange(self.addr2, 16) + range3 = lldb.SBAddressRange(self.addr3, 32) + + range_list = lldb.SBAddressRangeList() + range_list.Append(range1) + range_list.Append(range2) + range_list.Append(range3) + self.assertEqual(range_list.GetSize(), 3) + + # Test the iterator + for range in range_list: + self.assertTrue(range.IsValid()) + + def test_address_range_print_invalid(self): + """Make sure the SBAddressRange can be printed when invalid.""" + range = lldb.SBAddressRange() + self.assertEqual(str(range), "") + + def test_address_range_print_resolved(self): + """Make sure the SBAddressRange can be printed when resolved.""" + lldb.target = self.target + error = lldb.SBError() + process = self.target.Launch(self.launch_info, error) + self.assertTrue(error.Success(), "Make sure process launched successfully") + self.assertTrue(process, PROCESS_IS_VALID) + self.assertState(process.GetState(), lldb.eStateStopped, PROCESS_STOPPED) + + loc = self.bp1.GetLocationAtIndex(0) + loc_addr = loc.GetAddress() + func = loc_addr.GetFunction() + range = func.GetRanges().GetAddressRangeAtIndex(0) + range_str = str(range) + # [0x1000-0x2000] // Resolved with target or addresses without sections + self.assertRegex(range_str, "^\[0x[0-9a-f]+\-0x[0-9a-f]+\)$") + process.Kill() + + def test_address_range_print_no_section_resolved(self): + """Make sure the SBAddressRange can be printed with no secion.""" + lldb.target = self.target + error = lldb.SBError() + process = self.target.Launch(self.launch_info, error) + self.assertTrue(error.Success(), "Make sure process launched successfully") + self.assertTrue(process, PROCESS_IS_VALID) + self.assertState(process.GetState(), lldb.eStateStopped, PROCESS_STOPPED) + + loc = self.bp1.GetLocationAtIndex(0) + loc_addr = loc.GetAddress() + func = loc_addr.GetFunction() + range = func.GetRanges().GetAddressRangeAtIndex(0) + + addr = lldb.SBAddress() + addr.SetAddress(lldb.SBSection(), range.GetBaseAddress().GetOffset()) + self.assertFalse(addr.GetSection().IsValid()) + range = lldb.SBAddressRange(addr, range.GetByteSize()) + + range_str = str(range) + # [0x1000-0x2000] // Resolved with target or addresses without sections + self.assertRegex(range_str, "^\[0x[0-9a-f]+\-0x[0-9a-f]+\)$") + process.Kill() + + def test_address_range_print_not_resolved(self): + """Make sure the SBAddressRange can be printed when not resolved.""" + range = lldb.SBAddressRange(self.addr1, 8) + range_str = str(range) + # a.out[0x1000-0x2000] // Without target + self.assertRegex(range_str, "^a.out\[0x[0-9a-f]+\-0x[0-9a-f]+\)$") + + def test_address_range_list_print(self): + """Make sure the SBAddressRangeList can be printed.""" + range1 = lldb.SBAddressRange(self.addr1, 8) + range2 = lldb.SBAddressRange(self.addr2, 16) + range3 = lldb.SBAddressRange(self.addr3, 32) + self.dbg.SetAsync(True) + + range_list = lldb.SBAddressRangeList() + self.assertEqual(range_list.GetSize(), 0) + + range_list.Append(range1) + range_list.Append(range2) + range_list.Append(range3) + self.assertEqual(range_list.GetSize(), 3) + + range_list_str = str(range_list) + self.assertTrue(range_list_str.startswith("[")) + self.assertGreater(range_list_str.count(","), 1) + self.assertTrue(range_list_str.endswith("]")) + + def test_address_range_list_indexing(self): + """Make sure the SBAddressRangeList can be printed.""" + range1 = lldb.SBAddressRange(self.addr1, 8) + range2 = lldb.SBAddressRange(self.addr2, 16) + range_list = lldb.SBAddressRangeList() + range_list.Append(range1) + range_list.Append(range2) + + self.assertEqual(range_list.GetSize(), 2) + self.assertRaises(IndexError, lambda: range_list[2]) + self.assertRaises(TypeError, lambda: range_list["0"]) + self.assertEqual(range_list[0], range1) + self.assertEqual(range_list[1], range2) + self.assertEqual(range_list[-1], range2) + self.assertEqual(range_list[-2], range1) diff --git a/lldb/test/API/python_api/address_range/main.cpp b/lldb/test/API/python_api/address_range/main.cpp new file mode 100644 index 0000000000000..b6eaec4a23699 --- /dev/null +++ b/lldb/test/API/python_api/address_range/main.cpp @@ -0,0 +1,8 @@ +void foo() {} +void bar() {} + +int main() { + foo(); + bar(); + return 0; +} diff --git a/lldb/test/Shell/SymbolFile/DWARF/delayed-definition-die-searching.test b/lldb/test/Shell/SymbolFile/DWARF/delayed-definition-die-searching.test new file mode 100644 index 0000000000000..d253981b498c8 --- /dev/null +++ b/lldb/test/Shell/SymbolFile/DWARF/delayed-definition-die-searching.test @@ -0,0 +1,36 @@ +# Test definition DIE searching is delayed until complete type is required. + +# UNSUPPORTED: system-windows + +# RUN: split-file %s %t +# RUN: %clangxx_host %t/main.cpp %t/t1_def.cpp -gdwarf -o %t.out +# RUN: %lldb -b %t.out -s %t/lldb.cmd | FileCheck %s + +# CHECK: (lldb) p v1 +# CHECK: DWARFASTParserClang::ParseTypeFromDWARF{{.*}}DW_TAG_structure_type (DW_TAG_structure_type) name = 't2' +# CHECK: DWARFASTParserClang::ParseTypeFromDWARF{{.*}}DW_TAG_structure_type (DW_TAG_structure_type) name = 't1' +# CHECK: DW_TAG_structure_type (DW_TAG_structure_type) 't2' resolving forward declaration... +# CHECK: (t2) {} +# CHECK: (lldb) p v2 +# CHECK: DWARFASTParserClang::ParseTypeFromDWARF{{.*}}DW_TAG_structure_type (DW_TAG_structure_type) name = 't1' +# CHECK: DW_TAG_structure_type (DW_TAG_structure_type) 't1' resolving forward declaration... + +#--- lldb.cmd +log enable dwarf comp +p v1 +p v2 + +#--- main.cpp +template +struct t2 { +}; +struct t1; +t2 v1; // this CU doesn't have definition DIE for t1, but only declaration DIE for it. +int main() { +} + +#--- t1_def.cpp +struct t1 { // this CU contains definition DIE for t1. + int x; +}; +t1 v2; diff --git a/lldb/tools/lldb-dap/DAP.cpp b/lldb/tools/lldb-dap/DAP.cpp index c7eb3db4304a9..d419f821999e6 100644 --- a/lldb/tools/lldb-dap/DAP.cpp +++ b/lldb/tools/lldb-dap/DAP.cpp @@ -103,7 +103,9 @@ void DAP::SendJSON(const llvm::json::Value &json) { SendJSON(json_str); if (log) { - *log << "<-- " << std::endl + auto now = std::chrono::duration( + std::chrono::system_clock::now().time_since_epoch()); + *log << llvm::formatv("{0:f9} <-- ", now.count()).str() << std::endl << "Content-Length: " << json_str.size() << "\r\n\r\n" << llvm::formatv("{0:2}", json).str() << std::endl; } @@ -130,9 +132,12 @@ std::string DAP::ReadJSON() { if (!input.read_full(log.get(), length, json_str)) return json_str; - if (log) - *log << "--> " << std::endl << "Content-Length: " << length << "\r\n\r\n"; - + if (log) { + auto now = std::chrono::duration( + std::chrono::system_clock::now().time_since_epoch()); + *log << llvm::formatv("{0:f9} --> ", now.count()).str() << std::endl + << "Content-Length: " << length << "\r\n\r\n"; + } return json_str; } diff --git a/lldb/unittests/SymbolFile/DWARF/DWARFDIETest.cpp b/lldb/unittests/SymbolFile/DWARF/DWARFDIETest.cpp index 20742ea512309..bea07dfa27cc6 100644 --- a/lldb/unittests/SymbolFile/DWARF/DWARFDIETest.cpp +++ b/lldb/unittests/SymbolFile/DWARF/DWARFDIETest.cpp @@ -10,6 +10,8 @@ #include "Plugins/SymbolFile/DWARF/DWARFDebugInfo.h" #include "TestingSupport/Symbol/YAMLModuleTester.h" #include "lldb/Core/dwarf.h" +#include "lldb/Symbol/Type.h" +#include "lldb/lldb-private-enumerations.h" #include "llvm/ADT/STLExtras.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -187,3 +189,72 @@ TEST(DWARFDIETest, PeekName) { dw_offset_t fifth_die_offset = 26; EXPECT_EQ(unit->PeekDIEName(fifth_die_offset), "NameType2"); } + +TEST(DWARFDIETest, GetContext) { + const char *yamldata = R"( +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_386 +DWARF: + debug_abbrev: + - ID: 0 + Table: + - Code: 0x1 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_language + Form: DW_FORM_data2 + - Code: 0x2 + Tag: DW_TAG_namespace + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_string + - Code: 0x3 + Tag: DW_TAG_structure_type + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_string + debug_info: + - Version: 4 + AddrSize: 8 + Entries: + - AbbrCode: 0x1 + Values: + - Value: 0x000000000000000C + - AbbrCode: 0x2 + Values: + - CStr: NAMESPACE + - AbbrCode: 0x3 + Values: + - CStr: STRUCT + - AbbrCode: 0x0 + - AbbrCode: 0x0 +)"; + + YAMLModuleTester t(yamldata); + auto *symbol_file = + llvm::cast(t.GetModule()->GetSymbolFile()); + DWARFUnit *unit = symbol_file->DebugInfo().GetUnitAtIndex(0); + ASSERT_TRUE(unit); + + auto make_namespace = [](llvm::StringRef name) { + return CompilerContext(CompilerContextKind::Namespace, ConstString(name)); + }; + auto make_struct = [](llvm::StringRef name) { + return CompilerContext(CompilerContextKind::Struct, ConstString(name)); + }; + DWARFDIE struct_die = unit->DIE().GetFirstChild().GetFirstChild(); + ASSERT_TRUE(struct_die); + EXPECT_THAT( + struct_die.GetDeclContext(), + testing::ElementsAre(make_namespace("NAMESPACE"), make_struct("STRUCT"))); + EXPECT_THAT( + struct_die.GetTypeLookupContext(), + testing::ElementsAre(make_namespace("NAMESPACE"), make_struct("STRUCT"))); +} diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 612e90abd4091..64898ab09772f 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -560,6 +560,8 @@ set(LLVM_USE_STATIC_ZSTD FALSE CACHE BOOL "Use static version of zstd. Can be TR set(LLVM_ENABLE_CURL "OFF" CACHE STRING "Use libcurl for the HTTP client if available. Can be ON, OFF, or FORCE_ON") +set(LLVM_HAS_LOGF128 "OFF" CACHE STRING "Use logf128 to constant fold fp128 logarithm calls. Can be ON, OFF, or FORCE_ON") + set(LLVM_ENABLE_HTTPLIB "OFF" CACHE STRING "Use cpp-httplib HTTP server library if available. Can be ON, OFF, or FORCE_ON") set(LLVM_Z3_INSTALL_DIR "" CACHE STRING "Install directory of the Z3 solver.") diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake index 8cfb36b0194e8..0aae13e30f2ab 100644 --- a/llvm/cmake/config-ix.cmake +++ b/llvm/cmake/config-ix.cmake @@ -247,6 +247,17 @@ else() set(HAVE_LIBEDIT 0) endif() +if(LLVM_HAS_LOGF128) + include(CheckCXXSymbolExists) + check_cxx_symbol_exists(logf128 math.h HAS_LOGF128) + + if(LLVM_HAS_LOGF128 STREQUAL FORCE_ON AND NOT HAS_LOGF128) + message(FATAL_ERROR "Failed to configure logf128") + endif() + + set(LLVM_HAS_LOGF128 "${HAS_LOGF128}") +endif() + # function checks check_symbol_exists(arc4random "stdlib.h" HAVE_DECL_ARC4RANDOM) find_package(Backtrace) diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 1004956ac8f10..b827524e6b8db 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -64,7 +64,7 @@ to specify the target triple: Vendor Description ============ ============================================================== ``amd`` Can be used for all AMD GPU usage. - ``mesa3d`` Can be used if the OS is ``mesa3d``. + ``mesa`` Can be used if the OS is ``mesa3d``. ============ ============================================================== .. table:: AMDGPU Operating Systems diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 614dd98b013b3..7b64c477d13c7 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -4754,6 +4754,40 @@ reference to the CFI jump table in the ``LowerTypeTests`` pass. These constants may be useful in low-level programs, such as operating system kernels, which need to refer to the actual function body. +.. _ptrauth_constant: + +Pointer Authentication Constants +-------------------------------- + +``ptrauth (ptr CST, i32 KEY[, i64 DISC[, ptr ADDRDISC]?]?)`` + +A '``ptrauth``' constant represents a pointer with a cryptographic +authentication signature embedded into some bits, as described in the +`Pointer Authentication `__ document. + +A '``ptrauth``' constant is simply a constant equivalent to the +``llvm.ptrauth.sign`` intrinsic, potentially fed by a discriminator +``llvm.ptrauth.blend`` if needed. + +Its type is the same as the first argument. An integer constant discriminator +and an address discriminator may be optionally specified. Otherwise, they have +values ``i64 0`` and ``ptr null``. + +If the address discriminator is ``null`` then the expression is equivalent to + +.. code-block:: llvm + + %tmp = call i64 @llvm.ptrauth.sign(i64 ptrtoint (ptr CST to i64), i32 KEY, i64 DISC) + %val = inttoptr i64 %tmp to ptr + +Otherwise, the expression is equivalent to: + +.. code-block:: llvm + + %tmp1 = call i64 @llvm.ptrauth.blend(i64 ptrtoint (ptr ADDRDISC to i64), i64 DISC) + %tmp2 = call i64 @llvm.ptrauth.sign(i64 ptrtoint (ptr CST to i64), i32 KEY, i64 %tmp1) + %val = inttoptr i64 %tmp2 to ptr + .. _constantexprs: Constant Expressions diff --git a/llvm/docs/PointerAuth.md b/llvm/docs/PointerAuth.md index a8d2b4d8f5f0b..cf2cc6305f130 100644 --- a/llvm/docs/PointerAuth.md +++ b/llvm/docs/PointerAuth.md @@ -16,6 +16,7 @@ For more details, see the clang documentation page for At the IR level, it is represented using: * a [set of intrinsics](#intrinsics) (to sign/authenticate pointers) +* a [signed pointer constant](#constant) (to sign globals) * a [call operand bundle](#operand-bundle) (to authenticate called pointers) The current implementation leverages the @@ -225,6 +226,27 @@ with a pointer address discriminator, in a way that is specified by the target implementation. +### Constant + +[Intrinsics](#intrinsics) can be used to produce signed pointers dynamically, +in code, but not for signed pointers referenced by constants, in, e.g., global +initializers. + +The latter are represented using a +[``ptrauth`` constant](https://llvm.org/docs/LangRef.html#ptrauth-constant), +which describes an authenticated relocation producing a signed pointer. + +```llvm +ptrauth (ptr CST, i32 KEY, i64 DISC, ptr ADDRDISC) +``` + +is equivalent to: + +```llvm + %disc = call i64 @llvm.ptrauth.blend(i64 ptrtoint(ptr ADDRDISC to i64), i64 DISC) + %signedval = call i64 @llvm.ptrauth.sign(ptr CST, i32 KEY, i64 %disc) +``` + ### Operand Bundle Function pointers used as indirect call targets can be signed when materialized, diff --git a/llvm/docs/SPIRVUsage.rst b/llvm/docs/SPIRVUsage.rst index 657b0fb9b6724..de27f6b2372db 100644 --- a/llvm/docs/SPIRVUsage.rst +++ b/llvm/docs/SPIRVUsage.rst @@ -141,10 +141,16 @@ list of supported SPIR-V extensions, sorted alphabetically by their extension na - Allows generating arbitrary width integer types. * - ``SPV_INTEL_bfloat16_conversion`` - Adds instructions to convert between single-precision 32-bit floating-point values and 16-bit bfloat16 values. + * - ``SPV_INTEL_cache_controls`` + - Allows cache control information to be applied to memory access instructions. * - ``SPV_INTEL_function_pointers`` - Allows translation of function pointers. * - ``SPV_INTEL_inline_assembly`` - Allows to use inline assembly. + * - ``SPV_INTEL_global_variable_host_access`` + - Adds decorations that can be applied to global (module scope) variables. + * - ``SPV_INTEL_global_variable_fpga_decorations`` + - Adds decorations that can be applied to global (module scope) variables to help code generation for FPGA devices. * - ``SPV_INTEL_optnone`` - Adds OptNoneINTEL value for Function Control mask that indicates a request to not optimize the function. * - ``SPV_INTEL_subgroups`` diff --git a/llvm/docs/Security.rst b/llvm/docs/Security.rst index 9140923e5e8c9..a468ff51d2a6a 100644 --- a/llvm/docs/Security.rst +++ b/llvm/docs/Security.rst @@ -55,6 +55,7 @@ username for an individual isn't available, the brackets will be empty. * Serge Guelton (Mozilla) [@serge-sans-paille] * Shayne Hiet-Block (Microsoft) [@GreatKeeper] * Tim Penge (Sony) [] +* Tulio Magno Quites Machado Filho (Red Hat) [@tuliom] * Will Huhn (Intel) [@wphuhn-intel] Criteria diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h index deb74cb2fdeb1..44a301ecc9928 100644 --- a/llvm/include/llvm/ADT/APFloat.h +++ b/llvm/include/llvm/ADT/APFloat.h @@ -19,6 +19,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/FloatingPointMode.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/float128.h" #include #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \ @@ -354,6 +355,9 @@ class IEEEFloat final : public APFloatBase { Expected convertFromString(StringRef, roundingMode); APInt bitcastToAPInt() const; double convertToDouble() const; +#ifdef HAS_IEE754_FLOAT128 + float128 convertToQuad() const; +#endif float convertToFloat() const; /// @} @@ -1218,6 +1222,15 @@ class APFloat : public APFloatBase { /// shorter semantics, like IEEEsingle and others. double convertToDouble() const; + /// Converts this APFloat to host float value. + /// + /// \pre The APFloat must be built using semantics, that can be represented by + /// the host float type without loss of precision. It can be IEEEquad and + /// shorter semantics, like IEEEdouble and others. +#ifdef HAS_IEE754_FLOAT128 + float128 convertToQuad() const; +#endif + /// Converts this APFloat to host float value. /// /// \pre The APFloat must be built using semantics, that can be represented by diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h index 2fd8b7ea636c4..6cfa6ec665084 100644 --- a/llvm/include/llvm/ADT/APInt.h +++ b/llvm/include/llvm/ADT/APInt.h @@ -17,6 +17,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/float128.h" #include #include #include @@ -1677,6 +1678,13 @@ class [[nodiscard]] APInt { /// any bit width. Exactly 64 bits will be translated. double bitsToDouble() const { return llvm::bit_cast(getWord(0)); } +#ifdef HAS_IEE754_FLOAT128 + float128 bitsToQuad() const { + __uint128_t ul = ((__uint128_t)U.pVal[1] << 64) + U.pVal[0]; + return llvm::bit_cast(ul); + } +#endif + /// Converts APInt bits to a float /// /// The conversion does not do a translation from integer to float, it just diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 5828cc156cc78..72f3d94542496 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -912,6 +912,13 @@ class ScalarEvolution { return getBackedgeTakenCount(L, SymbolicMaximum); } + /// Similar to getSymbolicMaxBackedgeTakenCount, except it will add a set of + /// SCEV predicates to Predicates that are required to be true in order for + /// the answer to be correct. Predicates can be checked with run-time + /// checks and can be used to perform loop versioning. + const SCEV *getPredicatedSymbolicMaxBackedgeTakenCount( + const Loop *L, SmallVector &Predicates); + /// Return true if the backedge taken count is either the value returned by /// getConstantMaxBackedgeTakenCount or zero. bool isBackedgeTakenCountMaxOrZero(const Loop *L); @@ -1549,7 +1556,9 @@ class ScalarEvolution { ScalarEvolution *SE) const; /// Get the symbolic max backedge taken count for the loop. - const SCEV *getSymbolicMax(const Loop *L, ScalarEvolution *SE); + const SCEV * + getSymbolicMax(const Loop *L, ScalarEvolution *SE, + SmallVector *Predicates = nullptr); /// Get the symbolic max backedge taken count for the particular loop exit. const SCEV *getSymbolicMax(const BasicBlock *ExitingBlock, @@ -1746,7 +1755,7 @@ class ScalarEvolution { /// Similar to getBackedgeTakenInfo, but will add predicates as required /// with the purpose of returning complete information. - const BackedgeTakenInfo &getPredicatedBackedgeTakenInfo(const Loop *L); + BackedgeTakenInfo &getPredicatedBackedgeTakenInfo(const Loop *L); /// Compute the number of times the specified loop will iterate. /// If AllowPredicates is set, we will create new SCEV predicates as @@ -1761,11 +1770,6 @@ class ScalarEvolution { ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock, bool AllowPredicates = false); - /// Return a symbolic upper bound for the backedge taken count of the loop. - /// This is more general than getConstantMaxBackedgeTakenCount as it returns - /// an arbitrary expression as opposed to only constants. - const SCEV *computeSymbolicMaxBackedgeTakenCount(const Loop *L); - // Helper functions for computeExitLimitFromCond to avoid exponential time // complexity. @@ -2316,6 +2320,9 @@ class PredicatedScalarEvolution { /// Get the (predicated) backedge count for the analyzed loop. const SCEV *getBackedgeTakenCount(); + /// Get the (predicated) symbolic max backedge count for the analyzed loop. + const SCEV *getSymbolicMaxBackedgeTakenCount(); + /// Adds a new predicate. void addPredicate(const SCEVPredicate &Pred); @@ -2384,6 +2391,9 @@ class PredicatedScalarEvolution { /// The backedge taken count. const SCEV *BackedgeCount = nullptr; + + /// The symbolic backedge taken count. + const SCEV *SymbolicMaxBackedgeCount = nullptr; }; template <> struct DenseMapInfo { diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h index df61ec6ed30e0..69821c22dcd61 100644 --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -346,6 +346,7 @@ enum Kind { kw_blockaddress, kw_dso_local_equivalent, kw_no_cfi, + kw_ptrauth, kw_freeze, diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h index 38ef8e37df91d..acf89885af6fd 100644 --- a/llvm/include/llvm/BinaryFormat/Wasm.h +++ b/llvm/include/llvm/BinaryFormat/Wasm.h @@ -58,15 +58,16 @@ enum : unsigned { WASM_TYPE_V128 = 0x7B, WASM_TYPE_NULLFUNCREF = 0x73, WASM_TYPE_NULLEXTERNREF = 0x72, + WASM_TYPE_NULLEXNREF = 0x74, WASM_TYPE_NULLREF = 0x71, WASM_TYPE_FUNCREF = 0x70, WASM_TYPE_EXTERNREF = 0x6F, + WASM_TYPE_EXNREF = 0x69, WASM_TYPE_ANYREF = 0x6E, WASM_TYPE_EQREF = 0x6D, WASM_TYPE_I31REF = 0x6C, WASM_TYPE_STRUCTREF = 0x6B, WASM_TYPE_ARRAYREF = 0x6A, - WASM_TYPE_EXNREF = 0x69, WASM_TYPE_NONNULLABLE = 0x64, WASM_TYPE_NULLABLE = 0x63, WASM_TYPE_FUNC = 0x60, @@ -261,8 +262,9 @@ enum class ValType { V128 = WASM_TYPE_V128, FUNCREF = WASM_TYPE_FUNCREF, EXTERNREF = WASM_TYPE_EXTERNREF, + EXNREF = WASM_TYPE_EXNREF, // Unmodeled value types include ref types with heap types other than - // func or extern, and type-specialized funcrefs + // func, extern or exn, and type-specialized funcrefs OTHERREF = 0xff, }; @@ -410,7 +412,8 @@ struct WasmDataSegment { // 1) Does not model passive or declarative segments (Segment will end up with // an Offset field of i32.const 0) // 2) Does not model init exprs (Segment will get an empty Functions list) -// 2) Does not model types other than basic funcref/externref (see ValType) +// 3) Does not model types other than basic funcref/externref/exnref (see +// ValType) struct WasmElemSegment { uint32_t Flags; uint32_t TableNumber; diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index d3b9e96520f88..9999aee61528e 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -413,6 +413,7 @@ enum ConstantsCodes { // asmstr,conststr] CST_CODE_CE_GEP_WITH_INRANGE = 31, // [opty, flags, range, n x operands] CST_CODE_CE_GEP = 32, // [opty, flags, n x operands] + CST_CODE_PTRAUTH = 33, // [ptr, key, disc, addrdisc] }; /// CastOpcodes - These are values used in the bitcode files to encode which diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 96a6270690468..0dc237301abb4 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1241,11 +1241,11 @@ class SelectionDAG { /// Helper function to make it easier to build Select's if you just have /// operands and don't want to check for vector. SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, - SDValue RHS) { + SDValue RHS, SDNodeFlags Flags = SDNodeFlags()) { assert(LHS.getValueType() == VT && RHS.getValueType() == VT && "Cannot use select on differing types"); auto Opcode = Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT; - return getNode(Opcode, DL, VT, Cond, LHS, RHS); + return getNode(Opcode, DL, VT, Cond, LHS, RHS, Flags); } /// Helper function to make it easier to build SelectCC's if you just have an diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td index c3e378ed8f6ed..e322cc04c1c76 100644 --- a/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/llvm/include/llvm/CodeGen/ValueTypes.td @@ -280,11 +280,12 @@ def untyped : ValueType<8, 193> { // Produces an untyped value } def funcref : ValueType<0, 194>; // WebAssembly's funcref type def externref : ValueType<0, 195>; // WebAssembly's externref type -def x86amx : ValueType<8192, 196>; // X86 AMX value -def i64x8 : ValueType<512, 197>; // 8 Consecutive GPRs (AArch64) +def exnref : ValueType<0, 196>; // WebAssembly's exnref type +def x86amx : ValueType<8192, 197>; // X86 AMX value +def i64x8 : ValueType<512, 198>; // 8 Consecutive GPRs (AArch64) def aarch64svcount - : ValueType<16, 198>; // AArch64 predicate-as-counter -def spirvbuiltin : ValueType<0, 199>; // SPIR-V's builtin type + : ValueType<16, 199>; // AArch64 predicate-as-counter +def spirvbuiltin : ValueType<0, 200>; // SPIR-V's builtin type def token : ValueType<0, 248>; // TokenTy def MetadataVT : ValueType<0, 249> { // Metadata diff --git a/llvm/include/llvm/Config/llvm-config.h.cmake b/llvm/include/llvm/Config/llvm-config.h.cmake index 6605ea60df99e..629977cc11d68 100644 --- a/llvm/include/llvm/Config/llvm-config.h.cmake +++ b/llvm/include/llvm/Config/llvm-config.h.cmake @@ -198,4 +198,7 @@ /* Define if plugins enabled */ #cmakedefine LLVM_ENABLE_PLUGINS +/* Define if logf128 is available */ +#cmakedefine LLVM_HAS_LOGF128 + #endif diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index 3fa27608ead94..3feb4bd11c998 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -371,9 +371,8 @@ ConstructDecompositionT::addClauseSymsToMap(U &&item, // anything and return false, otherwise return true. template bool ConstructDecompositionT::applyToUnique(const ClauseTy *node) { - auto unique = detail::find_unique(leafs, [=](const auto &dirInfo) { - return llvm::omp::isAllowedClauseForDirective(dirInfo.id, node->id, - version); + auto unique = detail::find_unique(leafs, [=](const auto &leaf) { + return llvm::omp::isAllowedClauseForDirective(leaf.id, node->id, version); }); if (unique != leafs.end()) { @@ -438,8 +437,8 @@ bool ConstructDecompositionT::applyToAll(const ClauseTy *node) { } template -template -bool ConstructDecompositionT::applyClause(Clause &&clause, +template +bool ConstructDecompositionT::applyClause(Specific &&specific, const ClauseTy *node) { // The default behavior is to find the unique directive to which the // given clause may be applied. If there are no such directives, or diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h index a1e5005a9d1da..86f6be7985a23 100644 --- a/llvm/include/llvm/IR/Constants.h +++ b/llvm/include/llvm/IR/Constants.h @@ -1008,6 +1008,72 @@ struct OperandTraits : public FixedNumOperandTraits { DEFINE_TRANSPARENT_OPERAND_ACCESSORS(NoCFIValue, Value) +/// A signed pointer, in the ptrauth sense. +class ConstantPtrAuth final : public Constant { + friend struct ConstantPtrAuthKeyType; + friend class Constant; + + ConstantPtrAuth(Constant *Ptr, ConstantInt *Key, ConstantInt *Disc, + Constant *AddrDisc); + + void *operator new(size_t s) { return User::operator new(s, 4); } + + void destroyConstantImpl(); + Value *handleOperandChangeImpl(Value *From, Value *To); + +public: + /// Return a pointer signed with the specified parameters. + static ConstantPtrAuth *get(Constant *Ptr, ConstantInt *Key, + ConstantInt *Disc, Constant *AddrDisc); + + /// Produce a new ptrauth expression signing the given value using + /// the same schema as is stored in one. + ConstantPtrAuth *getWithSameSchema(Constant *Pointer) const; + + /// Transparently provide more efficient getOperand methods. + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant); + + /// The pointer that is signed in this ptrauth signed pointer. + Constant *getPointer() const { return cast(Op<0>().get()); } + + /// The Key ID, an i32 constant. + ConstantInt *getKey() const { return cast(Op<1>().get()); } + + /// The integer discriminator, an i64 constant, or 0. + ConstantInt *getDiscriminator() const { + return cast(Op<2>().get()); + } + + /// The address discriminator if any, or the null constant. + /// If present, this must be a value equivalent to the storage location of + /// the only global-initializer user of the ptrauth signed pointer. + Constant *getAddrDiscriminator() const { + return cast(Op<3>().get()); + } + + /// Whether there is any non-null address discriminator. + bool hasAddressDiscriminator() const { + return !getAddrDiscriminator()->isNullValue(); + } + + /// Check whether an authentication operation with key \p Key and (possibly + /// blended) discriminator \p Discriminator is known to be compatible with + /// this ptrauth signed pointer. + bool isKnownCompatibleWith(const Value *Key, const Value *Discriminator, + const DataLayout &DL) const; + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const Value *V) { + return V->getValueID() == ConstantPtrAuthVal; + } +}; + +template <> +struct OperandTraits + : public FixedNumOperandTraits {}; + +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantPtrAuth, Constant) + //===----------------------------------------------------------------------===// /// A constant value that is initialized with an expression using /// other constant values. diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 3019f68083d42..107442623ab7b 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -581,6 +581,7 @@ def llvm_vararg_ty : LLVMType; // this means vararg here def llvm_externref_ty : LLVMType; def llvm_funcref_ty : LLVMType; +def llvm_exnref_ty : LLVMType; //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 4544cf35fb7b3..9a71aaa9f4434 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3121,6 +3121,11 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[llvm_nxv8bf16_ty], [llvm_nxv4f32_ty, llvm_nxv4f32_ty], [IntrNoMem]>; + + class SME2_CVT_WIDENING_VG2_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], + [LLVMSubdivide2VectorType<0>], [IntrNoMem]>; + class SME2_CVT_VG4_SINGLE_Intrinsic : DefaultAttrsIntrinsic<[LLVMSubdivide4VectorType<0>], @@ -3356,6 +3361,12 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sve_bfmlslt : SME2_BFMLS_Intrinsic; def int_aarch64_sve_bfmlslt_lane : SME2_BFMLS_Lane_Intrinsic; + // Multi-vector zeroing + + foreach vg = ["vg1x2", "vg1x4", "vg2x1", "vg2x2", "vg2x4", "vg4x1", "vg4x2", "vg4x4"] in { + def int_aarch64_sme_zero_za64_ # vg : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects]>; + } + // Multi-vector signed saturating doubling multiply high def int_aarch64_sve_sqdmulh_single_vgx2 : SME2_VG2_Multi_Single_Intrinsic; @@ -3412,6 +3423,13 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_suvdot_lane_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic; def int_aarch64_sme_usvdot_lane_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic; + + // + //Multi-vector floating-point convert from half-precision to deinterleaved single-precision. + // + + def int_aarch64_sve_fcvtl_widen_x2 : SME2_CVT_WIDENING_VG2_Intrinsic; + // // Multi-vector floating-point CVT from single-precision to interleaved half-precision/BFloat16 // @@ -3431,7 +3449,7 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sve_fcvtzu_x4 : SME2_CVT_X4_Intrinsic; def int_aarch64_sve_scvtf_x4 : SME2_CVT_X4_Intrinsic; def int_aarch64_sve_ucvtf_x4 : SME2_CVT_X4_Intrinsic; - + def int_aarch64_sve_fcvt_widen_x2 : SME2_CVT_WIDENING_VG2_Intrinsic; // // Multi-vector saturating extract narrow // @@ -3472,10 +3490,12 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sve_sclamp_single_x2 : SME2_VG2_Multi_Single_Single_Intrinsic; def int_aarch64_sve_uclamp_single_x2 : SME2_VG2_Multi_Single_Single_Intrinsic; def int_aarch64_sve_fclamp_single_x2 : SME2_VG2_Multi_Single_Single_Intrinsic; + def int_aarch64_sve_bfclamp_single_x2 : SME2_VG2_Multi_Single_Single_Intrinsic; def int_aarch64_sve_sclamp_single_x4 : SME2_VG4_Multi_Single_Single_Intrinsic; def int_aarch64_sve_uclamp_single_x4 : SME2_VG4_Multi_Single_Single_Intrinsic; def int_aarch64_sve_fclamp_single_x4 : SME2_VG4_Multi_Single_Single_Intrinsic; + def int_aarch64_sve_bfclamp_single_x4 : SME2_VG4_Multi_Single_Single_Intrinsic; // // Multi-vector add/sub and accumulate into ZA diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td index 237f268784bb0..47aab196a6d4f 100644 --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -31,12 +31,17 @@ def int_wasm_ref_null_extern : DefaultAttrsIntrinsic<[llvm_externref_ty], [], [IntrNoMem]>; def int_wasm_ref_null_func : DefaultAttrsIntrinsic<[llvm_funcref_ty], [], [IntrNoMem]>; +def int_wasm_ref_null_exn: + DefaultAttrsIntrinsic<[llvm_exnref_ty], [], [IntrNoMem]>; def int_wasm_ref_is_null_extern : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_externref_ty], [IntrNoMem], "llvm.wasm.ref.is_null.extern">; def int_wasm_ref_is_null_func : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_funcref_ty], [IntrNoMem], "llvm.wasm.ref.is_null.func">; +def int_wasm_ref_is_null_exn : + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_exnref_ty], [IntrNoMem], + "llvm.wasm.ref.is_null.exn">; //===----------------------------------------------------------------------===// // Table intrinsics @@ -47,6 +52,9 @@ def int_wasm_table_set_externref : def int_wasm_table_set_funcref : DefaultAttrsIntrinsic<[], [llvm_table_ty, llvm_i32_ty, llvm_funcref_ty], [IntrWriteMem]>; +def int_wasm_table_set_exnref : + DefaultAttrsIntrinsic<[], [llvm_table_ty, llvm_i32_ty, llvm_exnref_ty], + [IntrWriteMem]>; def int_wasm_table_get_externref : DefaultAttrsIntrinsic<[llvm_externref_ty], [llvm_table_ty, llvm_i32_ty], @@ -54,6 +62,9 @@ def int_wasm_table_get_externref : def int_wasm_table_get_funcref : DefaultAttrsIntrinsic<[llvm_funcref_ty], [llvm_table_ty, llvm_i32_ty], [IntrReadMem]>; +def int_wasm_table_get_exnref : + DefaultAttrsIntrinsic<[llvm_exnref_ty], [llvm_table_ty, llvm_i32_ty], + [IntrReadMem]>; // Query the current table size, and increase the current table size. def int_wasm_table_size : @@ -68,6 +79,9 @@ def int_wasm_table_grow_externref : def int_wasm_table_grow_funcref : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_table_ty, llvm_funcref_ty, llvm_i32_ty], []>; +def int_wasm_table_grow_exnref : + DefaultAttrsIntrinsic<[llvm_i32_ty], + [llvm_table_ty, llvm_exnref_ty, llvm_i32_ty], []>; def int_wasm_table_fill_externref : DefaultAttrsIntrinsic<[], [llvm_table_ty, llvm_i32_ty, llvm_externref_ty, @@ -76,6 +90,10 @@ def int_wasm_table_fill_funcref : DefaultAttrsIntrinsic<[], [llvm_table_ty, llvm_i32_ty, llvm_funcref_ty, llvm_i32_ty], []>; +def int_wasm_table_fill_exnref : + DefaultAttrsIntrinsic<[], + [llvm_table_ty, llvm_i32_ty, llvm_exnref_ty, + llvm_i32_ty], []>; //===----------------------------------------------------------------------===// // Trapping float-to-int conversions diff --git a/llvm/include/llvm/IR/Value.def b/llvm/include/llvm/IR/Value.def index 61f7a87666d09..3ece66a529e12 100644 --- a/llvm/include/llvm/IR/Value.def +++ b/llvm/include/llvm/IR/Value.def @@ -81,6 +81,7 @@ HANDLE_CONSTANT(BlockAddress) HANDLE_CONSTANT(ConstantExpr) HANDLE_CONSTANT_EXCLUDE_LLVM_C_API(DSOLocalEquivalent) HANDLE_CONSTANT_EXCLUDE_LLVM_C_API(NoCFIValue) +HANDLE_CONSTANT_EXCLUDE_LLVM_C_API(ConstantPtrAuth) // ConstantAggregate. HANDLE_CONSTANT(ConstantArray) diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 66a99f16cdb63..d44a2d1e2fb11 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -28,10 +28,12 @@ enum IndexedVersion : uint64_t { Version1 = 1, // Version 2: Added a call stack table. Version2 = 2, + // Version 3: Under development. + Version3 = 3, }; constexpr uint64_t MinimumSupportedVersion = Version0; -constexpr uint64_t MaximumSupportedVersion = Version2; +constexpr uint64_t MaximumSupportedVersion = Version3; // Verify that the minimum and maximum satisfy the obvious constraint. static_assert(MinimumSupportedVersion <= MaximumSupportedVersion); @@ -426,8 +428,8 @@ struct IndexedMemProfRecord { // Convert IndexedMemProfRecord to MemProfRecord. Callback is used to // translate CallStackId to call stacks with frames inline. MemProfRecord toMemProfRecord( - llvm::function_ref(const CallStackId)> - Callback) const; + llvm::function_ref(const CallStackId)> Callback) + const; // Returns the GUID for the function name after canonicalization. For // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are diff --git a/llvm/include/llvm/Support/float128.h b/llvm/include/llvm/Support/float128.h new file mode 100644 index 0000000000000..e15a98dc5a677 --- /dev/null +++ b/llvm/include/llvm/Support/float128.h @@ -0,0 +1,26 @@ +//===-- llvm/Support/float128.h - Compiler abstraction support --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FLOAT128 +#define LLVM_FLOAT128 + +namespace llvm { + +#if defined(__clang__) && defined(__FLOAT128__) && \ + defined(__SIZEOF_INT128__) && !defined(__LONG_DOUBLE_IBM128__) +#define HAS_IEE754_FLOAT128 +typedef __float128 float128; +#elif defined(__FLOAT128__) && defined(__SIZEOF_INT128__) && \ + !defined(__LONG_DOUBLE_IBM128__) && \ + (defined(__GNUC__) || defined(__GNUG__)) +#define HAS_IEE754_FLOAT128 +typedef _Float128 float128; +#endif + +} // namespace llvm +#endif // LLVM_FLOAT128 diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h index b3fff3c99025a..5025ab2491de8 100644 --- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -183,55 +183,8 @@ struct ExtensionDependency { ArchExtKind Later; }; -// clang-format off -// Each entry here is a link in the dependency chain starting from the -// extension that was added to the architecture first. -inline constexpr ExtensionDependency ExtensionDependencies[] = { - {AEK_FP, AEK_FP16}, - {AEK_FP, AEK_SIMD}, - {AEK_FP, AEK_JSCVT}, - {AEK_FP, AEK_FP8}, - {AEK_SIMD, AEK_CRYPTO}, - {AEK_SIMD, AEK_AES}, - {AEK_SIMD, AEK_SHA2}, - {AEK_SIMD, AEK_SHA3}, - {AEK_SIMD, AEK_SM4}, - {AEK_SIMD, AEK_RDM}, - {AEK_SIMD, AEK_DOTPROD}, - {AEK_SIMD, AEK_FCMA}, - {AEK_FP16, AEK_FP16FML}, - {AEK_FP16, AEK_SVE}, - {AEK_BF16, AEK_SME}, - {AEK_BF16, AEK_B16B16}, - {AEK_SVE, AEK_SVE2}, - {AEK_SVE, AEK_F32MM}, - {AEK_SVE, AEK_F64MM}, - {AEK_SVE2, AEK_SVE2P1}, - {AEK_SVE2, AEK_SVE2BITPERM}, - {AEK_SVE2, AEK_SVE2AES}, - {AEK_SVE2, AEK_SVE2SHA3}, - {AEK_SVE2, AEK_SVE2SM4}, - {AEK_SVE2, AEK_SMEFA64}, - {AEK_SVE2, AEK_SMEFA64}, - {AEK_SME, AEK_SME2}, - {AEK_SME, AEK_SMEF16F16}, - {AEK_SME, AEK_SMEF64F64}, - {AEK_SME, AEK_SMEI16I64}, - {AEK_SME, AEK_SMEFA64}, - {AEK_SME2, AEK_SME2P1}, - {AEK_SME2, AEK_SSVE_FP8FMA}, - {AEK_SME2, AEK_SSVE_FP8DOT2}, - {AEK_SME2, AEK_SSVE_FP8DOT4}, - {AEK_SME2, AEK_SMEF8F16}, - {AEK_SME2, AEK_SMEF8F32}, - {AEK_FP8, AEK_SMEF8F16}, - {AEK_FP8, AEK_SMEF8F32}, - {AEK_LSE, AEK_LSE128}, - {AEK_PREDRES, AEK_SPECRES2}, - {AEK_RAS, AEK_RASV2}, - {AEK_RCPC, AEK_RCPC3}, -}; -// clang-format on +#define EMIT_EXTENSION_DEPENDENCIES +#include "llvm/TargetParser/AArch64TargetParserDef.inc" enum ArchProfile { AProfile = 'A', RProfile = 'R', InvalidProfile = '?' }; diff --git a/llvm/include/llvm/Transforms/Scalar/Reassociate.h b/llvm/include/llvm/Transforms/Scalar/Reassociate.h index f3a2e0f4380eb..84d72df6fc4d8 100644 --- a/llvm/include/llvm/Transforms/Scalar/Reassociate.h +++ b/llvm/include/llvm/Transforms/Scalar/Reassociate.h @@ -63,6 +63,16 @@ struct Factor { Factor(Value *Base, unsigned Power) : Base(Base), Power(Power) {} }; +struct OverflowTracking { + bool HasNUW; + bool HasNSW; + bool AllKnownNonNegative; + // Note: AllKnownNonNegative can be true in a case where one of the operands + // is negative, but one the operators is not NSW. AllKnownNonNegative should + // not be used independently of HasNSW + OverflowTracking() : HasNUW(true), HasNSW(true), AllKnownNonNegative(true) {} +}; + class XorOpnd; } // end namespace reassociate @@ -103,7 +113,7 @@ class ReassociatePass : public PassInfoMixin { void ReassociateExpression(BinaryOperator *I); void RewriteExprTree(BinaryOperator *I, SmallVectorImpl &Ops, - bool HasNUW); + reassociate::OverflowTracking Flags); Value *OptimizeExpression(BinaryOperator *I, SmallVectorImpl &Ops); Value *OptimizeAdd(Instruction *I, diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index 474b8d20fde16..74476cb5440c6 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -159,3 +159,9 @@ add_llvm_component_library(LLVMAnalysis Support TargetParser ) + +include(CheckCXXSymbolExists) +check_cxx_symbol_exists(logf128 math.h HAS_LOGF128) +if(HAS_LOGF128) + target_compile_definitions(LLVMAnalysis PRIVATE HAS_LOGF128) +endif() diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 705377b97ed90..5febe917126b1 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -2087,6 +2087,17 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, if (IntrinsicID == Intrinsic::canonicalize) return constantFoldCanonicalize(Ty, Call, U); +#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128) + if (Ty->isFP128Ty()) { + switch (IntrinsicID) { + default: + return nullptr; + case Intrinsic::log: + return ConstantFP::get(Ty, logf128(Op->getValueAPF().convertToQuad())); + } + } +#endif + if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) return nullptr; diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index bc8b9b8479e4f..bd4c2a35ebf2c 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1983,20 +1983,25 @@ getDependenceDistanceStrideAndSize( return MemoryDepChecker::Dependence::IndirectUnsafe; // Check if we can prove that Sink only accesses memory after Src's end or - // vice versa. - const auto &[SrcStart, SrcEnd] = - getStartAndEndForAccess(InnermostLoop, Src, ATy, PSE); - const auto &[SinkStart, SinkEnd] = - getStartAndEndForAccess(InnermostLoop, Sink, BTy, PSE); - - if (!isa(SrcStart) && - !isa(SrcEnd) && - !isa(SinkStart) && - !isa(SinkEnd)) { - if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SrcEnd, SinkStart)) - return MemoryDepChecker::Dependence::NoDep; - if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SinkEnd, SrcStart)) - return MemoryDepChecker::Dependence::NoDep; + // vice versa. At the moment this is limited to cases where either source or + // sink are loop invariant to avoid compile-time increases. This is not + // required for correctness. + if (SE.isLoopInvariant(Src, InnermostLoop) || + SE.isLoopInvariant(Sink, InnermostLoop)) { + const auto &[SrcStart, SrcEnd] = + getStartAndEndForAccess(InnermostLoop, Src, ATy, PSE); + const auto &[SinkStart, SinkEnd] = + getStartAndEndForAccess(InnermostLoop, Sink, BTy, PSE); + + if (!isa(SrcStart) && + !isa(SrcEnd) && + !isa(SinkStart) && + !isa(SinkEnd)) { + if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SrcEnd, SinkStart)) + return MemoryDepChecker::Dependence::NoDep; + if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SinkEnd, SrcStart)) + return MemoryDepChecker::Dependence::NoDep; + } } // Need accesses with constant strides and the same direction. We don't want diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 8d971e6a78e42..e46d7183a2a35 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -8295,6 +8295,11 @@ const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L, llvm_unreachable("Invalid ExitCountKind!"); } +const SCEV *ScalarEvolution::getPredicatedSymbolicMaxBackedgeTakenCount( + const Loop *L, SmallVector &Preds) { + return getPredicatedBackedgeTakenInfo(L).getSymbolicMax(L, this, &Preds); +} + bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) { return getBackedgeTakenInfo(L).isConstantMaxOrZero(this); } @@ -8311,7 +8316,7 @@ static void PushLoopPHIs(const Loop *L, Worklist.push_back(&PN); } -const ScalarEvolution::BackedgeTakenInfo & +ScalarEvolution::BackedgeTakenInfo & ScalarEvolution::getPredicatedBackedgeTakenInfo(const Loop *L) { auto &BTI = getBackedgeTakenInfo(L); if (BTI.hasFullInfo()) @@ -8644,11 +8649,37 @@ ScalarEvolution::BackedgeTakenInfo::getConstantMax(ScalarEvolution *SE) const { return getConstantMax(); } -const SCEV * -ScalarEvolution::BackedgeTakenInfo::getSymbolicMax(const Loop *L, - ScalarEvolution *SE) { - if (!SymbolicMax) - SymbolicMax = SE->computeSymbolicMaxBackedgeTakenCount(L); +const SCEV *ScalarEvolution::BackedgeTakenInfo::getSymbolicMax( + const Loop *L, ScalarEvolution *SE, + SmallVector *Predicates) { + if (!SymbolicMax) { + // Form an expression for the maximum exit count possible for this loop. We + // merge the max and exact information to approximate a version of + // getConstantMaxBackedgeTakenCount which isn't restricted to just + // constants. + SmallVector ExitCounts; + + for (const auto &ENT : ExitNotTaken) { + const SCEV *ExitCount = ENT.SymbolicMaxNotTaken; + if (!isa(ExitCount)) { + assert(SE->DT.dominates(ENT.ExitingBlock, L->getLoopLatch()) && + "We should only have known counts for exiting blocks that " + "dominate latch!"); + ExitCounts.push_back(ExitCount); + if (Predicates) + for (const auto *P : ENT.Predicates) + Predicates->push_back(P); + + assert((Predicates || ENT.hasAlwaysTruePredicate()) && + "Predicate should be always true!"); + } + } + if (ExitCounts.empty()) + SymbolicMax = SE->getCouldNotCompute(); + else + SymbolicMax = + SE->getUMinFromMismatchedTypes(ExitCounts, /*Sequential*/ true); + } return SymbolicMax; } @@ -13589,6 +13620,24 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, P->print(OS, 4); } + Preds.clear(); + auto *PredSymbolicMax = + SE->getPredicatedSymbolicMaxBackedgeTakenCount(L, Preds); + if (SymbolicBTC != PredSymbolicMax) { + OS << "Loop "; + L->getHeader()->printAsOperand(OS, /*PrintType=*/false); + OS << ": "; + if (!isa(PredSymbolicMax)) { + OS << "Predicated symbolic max backedge-taken count is "; + PrintSCEVWithTypeHint(OS, PredSymbolicMax); + } else + OS << "Unpredictable predicated symbolic max backedge-taken count."; + OS << "\n"; + OS << " Predicates:\n"; + for (const auto *P : Preds) + P->print(OS, 4); + } + if (SE->hasLoopInvariantBackedgeTakenCount(L)) { OS << "Loop "; L->getHeader()->printAsOperand(OS, /*PrintType=*/false); @@ -14802,6 +14851,17 @@ const SCEV *PredicatedScalarEvolution::getBackedgeTakenCount() { return BackedgeCount; } +const SCEV *PredicatedScalarEvolution::getSymbolicMaxBackedgeTakenCount() { + if (!SymbolicMaxBackedgeCount) { + SmallVector Preds; + SymbolicMaxBackedgeCount = + SE.getPredicatedSymbolicMaxBackedgeTakenCount(&L, Preds); + for (const auto *P : Preds) + addPredicate(*P); + } + return SymbolicMaxBackedgeCount; +} + void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) { if (Preds->implies(&Pred)) return; @@ -14964,30 +15024,6 @@ bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS, return false; } -const SCEV * -ScalarEvolution::computeSymbolicMaxBackedgeTakenCount(const Loop *L) { - SmallVector ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - - // Form an expression for the maximum exit count possible for this loop. We - // merge the max and exact information to approximate a version of - // getConstantMaxBackedgeTakenCount which isn't restricted to just constants. - SmallVector ExitCounts; - for (BasicBlock *ExitingBB : ExitingBlocks) { - const SCEV *ExitCount = - getExitCount(L, ExitingBB, ScalarEvolution::SymbolicMaximum); - if (!isa(ExitCount)) { - assert(DT.dominates(ExitingBB, L->getLoopLatch()) && - "We should only have known counts for exiting blocks that " - "dominate latch!"); - ExitCounts.push_back(ExitCount); - } - } - if (ExitCounts.empty()) - return getCouldNotCompute(); - return getUMinFromMismatchedTypes(ExitCounts, /*Sequential*/ true); -} - /// A rewriter to replace SCEV expressions in Map with the corresponding entry /// in the map. It skips AddRecExpr because we cannot guarantee that the /// replacement is loop invariant in the loop of the AddRec. diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 3baa8ede28ffa..08138a5e2f2d9 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -3140,6 +3140,10 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, return true; } + // Constant ptrauth can be null, iff the base pointer can be. + if (auto *CPA = dyn_cast(V)) + return isKnownNonZero(CPA->getPointer(), DemandedElts, Q, Depth); + // A global variable in address space 0 is non null unless extern weak // or an absolute symbol reference. Other address spaces may have null as a // valid address for a global, so we can't assume anything. diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 20a1bd2957712..d3ab306904da1 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -710,6 +710,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(blockaddress); KEYWORD(dso_local_equivalent); KEYWORD(no_cfi); + KEYWORD(ptrauth); // Metadata types. KEYWORD(distinct); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 5d2056d208567..df0827996396e 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -4046,6 +4046,60 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) { ID.NoCFI = true; return false; } + case lltok::kw_ptrauth: { + // ValID ::= 'ptrauth' '(' ptr @foo ',' i32 + // (',' i64 (',' ptr addrdisc)? )? ')' + Lex.Lex(); + + Constant *Ptr, *Key; + Constant *Disc = nullptr, *AddrDisc = nullptr; + + if (parseToken(lltok::lparen, + "expected '(' in constant ptrauth expression") || + parseGlobalTypeAndValue(Ptr) || + parseToken(lltok::comma, + "expected comma in constant ptrauth expression") || + parseGlobalTypeAndValue(Key)) + return true; + // If present, parse the optional disc/addrdisc. + if (EatIfPresent(lltok::comma)) + if (parseGlobalTypeAndValue(Disc) || + (EatIfPresent(lltok::comma) && parseGlobalTypeAndValue(AddrDisc))) + return true; + if (parseToken(lltok::rparen, + "expected ')' in constant ptrauth expression")) + return true; + + if (!Ptr->getType()->isPointerTy()) + return error(ID.Loc, "constant ptrauth base pointer must be a pointer"); + + auto *KeyC = dyn_cast(Key); + if (!KeyC || KeyC->getBitWidth() != 32) + return error(ID.Loc, "constant ptrauth key must be i32 constant"); + + ConstantInt *DiscC = nullptr; + if (Disc) { + DiscC = dyn_cast(Disc); + if (!DiscC || DiscC->getBitWidth() != 64) + return error( + ID.Loc, + "constant ptrauth integer discriminator must be i64 constant"); + } else { + DiscC = ConstantInt::get(Type::getInt64Ty(Context), 0); + } + + if (AddrDisc) { + if (!AddrDisc->getType()->isPointerTy()) + return error( + ID.Loc, "constant ptrauth address discriminator must be a pointer"); + } else { + AddrDisc = ConstantPointerNull::get(PointerType::get(Context, 0)); + } + + ID.ConstantVal = ConstantPtrAuth::get(Ptr, KeyC, DiscC, AddrDisc); + ID.Kind = ValID::t_Constant; + return false; + } case lltok::kw_trunc: case lltok::kw_bitcast: diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp index c085c715179ba..b7ed9cdf63145 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp @@ -222,6 +222,7 @@ GetCodeName(unsigned CodeID, unsigned BlockID, STRINGIFY_CODE(CST_CODE, CE_UNOP) STRINGIFY_CODE(CST_CODE, DSO_LOCAL_EQUIVALENT) STRINGIFY_CODE(CST_CODE, NO_CFI_VALUE) + STRINGIFY_CODE(CST_CODE, PTRAUTH) case bitc::CST_CODE_BLOCKADDRESS: return "CST_CODE_BLOCKADDRESS"; STRINGIFY_CODE(CST_CODE, DATA) diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 32b9a033173e9..aee627bbde0bf 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -517,7 +517,8 @@ class BitcodeConstant final : public Value, static constexpr uint8_t NoCFIOpcode = 252; static constexpr uint8_t DSOLocalEquivalentOpcode = 251; static constexpr uint8_t BlockAddressOpcode = 250; - static constexpr uint8_t FirstSpecialOpcode = BlockAddressOpcode; + static constexpr uint8_t ConstantPtrAuthOpcode = 249; + static constexpr uint8_t FirstSpecialOpcode = ConstantPtrAuthOpcode; // Separate struct to make passing different number of parameters to // BitcodeConstant::create() more convenient. @@ -1562,6 +1563,18 @@ Expected BitcodeReader::materializeValue(unsigned StartValID, C = ConstantExpr::get(BC->Opcode, ConstOps[0], ConstOps[1], BC->Flags); } else { switch (BC->Opcode) { + case BitcodeConstant::ConstantPtrAuthOpcode: { + auto *Key = dyn_cast(ConstOps[1]); + if (!Key) + return error("ptrauth key operand must be ConstantInt"); + + auto *Disc = dyn_cast(ConstOps[2]); + if (!Disc) + return error("ptrauth disc operand must be ConstantInt"); + + C = ConstantPtrAuth::get(ConstOps[0], Key, Disc, ConstOps[3]); + break; + } case BitcodeConstant::NoCFIOpcode: { auto *GV = dyn_cast(ConstOps[0]); if (!GV) @@ -3644,6 +3657,16 @@ Error BitcodeReader::parseConstants() { Record[1]); break; } + case bitc::CST_CODE_PTRAUTH: { + if (Record.size() < 4) + return error("Invalid ptrauth record"); + // Ptr, Key, Disc, AddrDisc + V = BitcodeConstant::create(Alloc, CurTy, + BitcodeConstant::ConstantPtrAuthOpcode, + {(unsigned)Record[0], (unsigned)Record[1], + (unsigned)Record[2], (unsigned)Record[3]}); + break; + } } assert(V->getType() == getTypeByID(CurTyID) && "Incorrect result type ID"); diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 3d653fe4458f4..046dad5721c4c 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -2848,6 +2848,12 @@ void ModuleBitcodeWriter::writeConstants(unsigned FirstVal, unsigned LastVal, Code = bitc::CST_CODE_NO_CFI_VALUE; Record.push_back(VE.getTypeID(NC->getGlobalValue()->getType())); Record.push_back(VE.getValueID(NC->getGlobalValue())); + } else if (const auto *CPA = dyn_cast(C)) { + Code = bitc::CST_CODE_PTRAUTH; + Record.push_back(VE.getValueID(CPA->getPointer())); + Record.push_back(VE.getValueID(CPA->getKey())); + Record.push_back(VE.getValueID(CPA->getDiscriminator())); + Record.push_back(VE.getValueID(CPA->getAddrDiscriminator())); } else { #ifndef NDEBUG C->dump(); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index c04f7208c61f2..9208b096affad 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3972,7 +3972,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { // target can override this with custom lowering and calling the // implementation functions. LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - if (LI.isLegalOrCustom({G_UMIN, Ty}) && LI.isLegalOrCustom({G_UMAX, Ty})) + if (LI.isLegalOrCustom({G_UMIN, Ty})) return lowerAddSubSatToMinMax(MI); return lowerAddSubSatToAddoSubo(MI); } @@ -7977,27 +7977,51 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerBitreverse(MachineInstr &MI) { auto [Dst, Src] = MI.getFirst2Regs(); const LLT Ty = MRI.getType(Src); - unsigned Size = Ty.getSizeInBits(); + unsigned Size = Ty.getScalarSizeInBits(); + + if (Size >= 8) { + MachineInstrBuilder BSWAP = + MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src}); + + // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654 + // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4] + // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0] + MachineInstrBuilder Swap4 = + SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0))); + + // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76 + // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2] + // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC] + MachineInstrBuilder Swap2 = + SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC))); + + // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 + // 6|7 + // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1] + // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA] + SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA))); + } else { + // Expand bitreverse for types smaller than 8 bits. + MachineInstrBuilder Tmp; + for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) { + MachineInstrBuilder Tmp2; + if (I < J) { + auto ShAmt = MIRBuilder.buildConstant(Ty, J - I); + Tmp2 = MIRBuilder.buildShl(Ty, Src, ShAmt); + } else { + auto ShAmt = MIRBuilder.buildConstant(Ty, I - J); + Tmp2 = MIRBuilder.buildLShr(Ty, Src, ShAmt); + } - MachineInstrBuilder BSWAP = - MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src}); - - // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654 - // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4] - // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0] - MachineInstrBuilder Swap4 = - SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0))); - - // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76 - // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2] - // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC] - MachineInstrBuilder Swap2 = - SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC))); - - // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7 - // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1] - // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA] - SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA))); + auto Mask = MIRBuilder.buildConstant(Ty, 1U << J); + Tmp2 = MIRBuilder.buildAnd(Ty, Tmp2, Mask); + if (I == 0) + Tmp = Tmp2; + else + Tmp = MIRBuilder.buildOr(Ty, Tmp, Tmp2); + } + MIRBuilder.buildCopy(Dst, Tmp); + } MI.eraseFromParent(); return Legalized; diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 727a98c41bce4..86eb259c09015 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -1269,8 +1269,9 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI, Register DefReg = MI.getOperand(0).getReg(); if (DefReg.isVirtual() && all_of(MI.uses(), - [](const MachineOperand &UseOp) { - return !UseOp.isReg() || UseOp.getReg().isVirtual(); + [this](const MachineOperand &UseOp) { + return !UseOp.isReg() || UseOp.getReg().isVirtual() || + MRI->isConstantPhysReg(UseOp.getReg()); }) && IsLoopInvariantInst(MI, CurLoop) && any_of(MRI->use_nodbg_instructions(DefReg), diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 93d866384b482..42e861e61201c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10107,6 +10107,18 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (SDValue NewSHL = visitShiftByConstant(N)) return NewSHL; + // fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the + // target. + if ((N1.getOpcode() == ISD::CTTZ || N1.getOpcode() == ISD::CTTZ_ZERO_UNDEF) && + N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::CTTZ, VT) && + TLI.isOperationLegalOrCustom(ISD::MUL, VT)) { + SDValue Y = N1.getOperand(0); + SDLoc DL(N); + SDValue NegY = DAG.getNegative(Y, DL, VT); + SDValue And = DAG.getNode(ISD::AND, DL, VT, Y, NegY); + return DAG.getNode(ISD::MUL, DL, VT, And, N0); + } + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); @@ -11186,17 +11198,19 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) { return SDValue(); } -// FIXME: This should be checking for no signed zeros on individual operands, as -// well as no nans. static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, - SDValue RHS, + SDValue RHS, const SDNodeFlags Flags, const TargetLowering &TLI) { - const TargetOptions &Options = DAG.getTarget().Options; EVT VT = LHS.getValueType(); + if (!VT.isFloatingPoint()) + return false; + + const TargetOptions &Options = DAG.getTarget().Options; - return Options.NoSignedZerosFPMath && VT.isFloatingPoint() && + return (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) && TLI.isProfitableToCombineMinNumMaxNum(VT) && - DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS); + (Flags.hasNoNaNs() || + (DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS))); } static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, @@ -11674,7 +11688,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // select (fcmp gt x, y), x, y -> fmaxnum x, y // // This is OK if we don't care what happens if either operand is a NaN. - if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI)) + if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, TLI)) if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC)) return FMinMax; @@ -12267,7 +12281,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { // This is OK if we don't care about what happens if either operand is a // NaN. // - if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) { + if (N0.hasOneUse() && + isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) { if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC)) return FMinMax; } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index fc96ecdc66280..fb1424f75e097 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -2488,6 +2488,8 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMINIMUM: case ISD::FMAXNUM: case ISD::FMINNUM: + case ISD::FMAXNUM_IEEE: + case ISD::FMINNUM_IEEE: case ISD::FMUL: case ISD::FPOW: case ISD::FREM: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 8fda35f008632..12f1d005249d6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -646,18 +646,21 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { } } - // Zero extend to the promoted type and do the count there. - SDValue Op = ZExtPromotedInteger(N->getOperand(0)); - // Subtract off the extra leading bits in the bigger type. SDValue ExtractLeadingBits = DAG.getConstant( NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl, NVT); - if (!N->isVPOpcode()) + if (!N->isVPOpcode()) { + // Zero extend to the promoted type and do the count there. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); return DAG.getNode(ISD::SUB, dl, NVT, DAG.getNode(N->getOpcode(), dl, NVT, Op), ExtractLeadingBits); + } + SDValue Mask = N->getOperand(1); SDValue EVL = N->getOperand(2); + // Zero extend to the promoted type and do the count there. + SDValue Op = VPZExtPromotedInteger(N->getOperand(0), Mask, EVL); return DAG.getNode(ISD::VP_SUB, dl, NVT, DAG.getNode(N->getOpcode(), dl, NVT, Op, Mask, EVL), ExtractLeadingBits, Mask, EVL); @@ -681,11 +684,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) { } // Zero extend to the promoted type and do the count or parity there. - SDValue Op = ZExtPromotedInteger(N->getOperand(0)); - if (!N->isVPOpcode()) + if (!N->isVPOpcode()) { + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op); - return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op, - N->getOperand(1), N->getOperand(2)); + } + + SDValue Mask = N->getOperand(1); + SDValue EVL = N->getOperand(2); + SDValue Op = VPZExtPromotedInteger(N->getOperand(0), Mask, EVL); + return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op, Mask, + EVL); } SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { @@ -1335,12 +1343,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FFREXP(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { SDValue LHS = GetPromotedInteger(N->getOperand(0)); SDValue RHS = N->getOperand(1); - if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) - RHS = ZExtPromotedInteger(RHS); - if (N->getOpcode() != ISD::VP_SHL) + if (N->getOpcode() != ISD::VP_SHL) { + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = ZExtPromotedInteger(RHS); + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + } + + SDValue Mask = N->getOperand(2); + SDValue EVL = N->getOperand(3); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = VPZExtPromotedInteger(RHS, Mask, EVL); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, - N->getOperand(2), N->getOperand(3)); + Mask, EVL); } SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) { @@ -1364,27 +1379,39 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) { - // Sign extend the input. - SDValue LHS = SExtPromotedInteger(N->getOperand(0)); - SDValue RHS = SExtPromotedInteger(N->getOperand(1)); - if (N->getNumOperands() == 2) + if (N->getNumOperands() == 2) { + // Sign extend the input. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = SExtPromotedInteger(N->getOperand(1)); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + } assert(N->getNumOperands() == 4 && "Unexpected number of operands!"); assert(N->isVPOpcode() && "Expected VP opcode"); + SDValue Mask = N->getOperand(2); + SDValue EVL = N->getOperand(3); + // Sign extend the input. + SDValue LHS = VPSExtPromotedInteger(N->getOperand(0), Mask, EVL); + SDValue RHS = VPSExtPromotedInteger(N->getOperand(1), Mask, EVL); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, - N->getOperand(2), N->getOperand(3)); + Mask, EVL); } SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) { - // Zero extend the input. - SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); - SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); - if (N->getNumOperands() == 2) + if (N->getNumOperands() == 2) { + // Zero extend the input. + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + } assert(N->getNumOperands() == 4 && "Unexpected number of operands!"); assert(N->isVPOpcode() && "Expected VP opcode"); + // Zero extend the input. + SDValue Mask = N->getOperand(2); + SDValue EVL = N->getOperand(3); + SDValue LHS = VPZExtPromotedInteger(N->getOperand(0), Mask, EVL); + SDValue RHS = VPZExtPromotedInteger(N->getOperand(1), Mask, EVL); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, - N->getOperand(2), N->getOperand(3)); + Mask, EVL); } SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) { @@ -1400,27 +1427,43 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { - // The input value must be properly sign extended. - SDValue LHS = SExtPromotedInteger(N->getOperand(0)); SDValue RHS = N->getOperand(1); - if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) - RHS = ZExtPromotedInteger(RHS); - if (N->getOpcode() != ISD::VP_SRA) + if (N->getOpcode() != ISD::VP_SRA) { + // The input value must be properly sign extended. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = ZExtPromotedInteger(RHS); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + } + + SDValue Mask = N->getOperand(2); + SDValue EVL = N->getOperand(3); + // The input value must be properly sign extended. + SDValue LHS = VPSExtPromotedInteger(N->getOperand(0), Mask, EVL); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = VPZExtPromotedInteger(RHS, Mask, EVL); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, - N->getOperand(2), N->getOperand(3)); + Mask, EVL); } SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { - // The input value must be properly zero extended. - SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); SDValue RHS = N->getOperand(1); - if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) - RHS = ZExtPromotedInteger(RHS); - if (N->getOpcode() != ISD::VP_SRL) + if (N->getOpcode() != ISD::VP_SRL) { + // The input value must be properly zero extended. + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = ZExtPromotedInteger(RHS); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + } + + SDValue Mask = N->getOperand(2); + SDValue EVL = N->getOperand(3); + // The input value must be properly zero extended. + SDValue LHS = VPZExtPromotedInteger(N->getOperand(0), Mask, EVL); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = VPZExtPromotedInteger(RHS, Mask, EVL); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, - N->getOperand(2), N->getOperand(3)); + Mask, EVL); } SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) { @@ -1487,7 +1530,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VPFunnelShift(SDNode *N) { SDValue Mask = N->getOperand(3); SDValue EVL = N->getOperand(4); if (getTypeAction(Amt.getValueType()) == TargetLowering::TypePromoteInteger) - Amt = ZExtPromotedInteger(Amt); + Amt = VPZExtPromotedInteger(Amt, Mask, EVL); EVT AmtVT = Amt.getValueType(); SDLoc DL(N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index d925089d5689f..ba3c7582d5a8a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -275,6 +275,27 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { return DAG.getZeroExtendInReg(Op, dl, OldVT); } + /// Get a promoted operand and zero extend it to the final size. + SDValue VPSExtPromotedInteger(SDValue Op, SDValue Mask, SDValue EVL) { + EVT OldVT = Op.getValueType(); + SDLoc dl(Op); + Op = GetPromotedInteger(Op); + // FIXME: Add VP_SIGN_EXTEND_INREG. + EVT VT = Op.getValueType(); + unsigned BitsDiff = VT.getScalarSizeInBits() - OldVT.getScalarSizeInBits(); + SDValue ShiftCst = DAG.getShiftAmountConstant(BitsDiff, VT, dl); + SDValue Shl = DAG.getNode(ISD::VP_SHL, dl, VT, Op, ShiftCst, Mask, EVL); + return DAG.getNode(ISD::VP_SRA, dl, VT, Shl, ShiftCst, Mask, EVL); + } + + /// Get a promoted operand and zero extend it to the final size. + SDValue VPZExtPromotedInteger(SDValue Op, SDValue Mask, SDValue EVL) { + EVT OldVT = Op.getValueType(); + SDLoc dl(Op); + Op = GetPromotedInteger(Op); + return DAG.getVPZeroExtendInReg(Op, Mask, EVL, dl, OldVT); + } + // Promote the given operand V (vector or scalar) according to N's specific // reduction kind. N must be an integer VECREDUCE_* or VP_REDUCE_*. Returns // the nominal extension opcode (ISD::(ANY|ZERO|SIGN)_EXTEND) and the diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 40e621f0db220..361416edb554c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1174,8 +1174,12 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FADD: case ISD::VP_FADD: case ISD::FSUB: case ISD::VP_FSUB: case ISD::FMUL: case ISD::VP_FMUL: - case ISD::FMINNUM: case ISD::VP_FMINNUM: - case ISD::FMAXNUM: case ISD::VP_FMAXNUM: + case ISD::FMINNUM: + case ISD::FMINNUM_IEEE: + case ISD::VP_FMINNUM: + case ISD::FMAXNUM: + case ISD::FMAXNUM_IEEE: + case ISD::VP_FMAXNUM: case ISD::FMINIMUM: case ISD::VP_FMINIMUM: case ISD::FMAXIMUM: @@ -4237,8 +4241,12 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::SHL: case ISD::VP_SHL: case ISD::SRA: case ISD::VP_SRA: case ISD::SRL: case ISD::VP_SRL: - case ISD::FMINNUM: case ISD::VP_FMINNUM: - case ISD::FMAXNUM: case ISD::VP_FMAXNUM: + case ISD::FMINNUM: + case ISD::FMINNUM_IEEE: + case ISD::VP_FMINNUM: + case ISD::FMAXNUM: + case ISD::FMAXNUM_IEEE: + case ISD::VP_FMAXNUM: case ISD::FMINIMUM: case ISD::VP_FMINIMUM: case ISD::FMAXIMUM: diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 4e47f50ee4289..623b6343994a4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8428,6 +8428,7 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N, EVT VT = N->getValueType(0); EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); bool IsMax = Opc == ISD::FMAXIMUM; + SDNodeFlags Flags = N->getFlags(); if (VT.isVector() && isOperationLegalOrCustomOrPromote(Opc, VT.getScalarType())) @@ -8444,15 +8445,15 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N, bool MinMaxMustRespectOrderedZero = false; if (isOperationLegalOrCustom(CompOpcIeee, VT)) { - MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS); + MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags); MinMaxMustRespectOrderedZero = true; } else if (isOperationLegalOrCustom(CompOpc, VT)) { - MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS); + MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags); } else { // NaN (if exists) will be propagated later, so orderness doesn't matter. SDValue Compare = DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT); - MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS); + MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags); } // Propagate any NaN of both operands @@ -8461,7 +8462,7 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N, ConstantFP *FPNaN = ConstantFP::get( *DAG.getContext(), APFloat::getNaN(DAG.EVTToAPFloatSemantics(VT))); MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO), - DAG.getConstantFP(*FPNaN, DL, VT), MinMax); + DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags); } // fminimum/fmaximum requires -0.0 less than +0.0 @@ -8473,11 +8474,11 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N, DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32); SDValue LCmp = DAG.getSelect( DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS, - MinMax); + MinMax, Flags); SDValue RCmp = DAG.getSelect( DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, - LCmp); - MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax); + LCmp, Flags); + MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags); } return MinMax; diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp index 3d5c58d282da5..df1c02c3dc67c 100644 --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -181,6 +181,7 @@ std::string EVT::getEVTString() const { case MVT::Metadata: return "Metadata"; case MVT::Untyped: return "Untyped"; case MVT::funcref: return "funcref"; + case MVT::exnref: return "exnref"; case MVT::externref: return "externref"; case MVT::aarch64svcount: return "aarch64svcount"; diff --git a/llvm/lib/CodeGen/WasmEHPrepare.cpp b/llvm/lib/CodeGen/WasmEHPrepare.cpp index 1a9e1ba869c31..16c1dcb1e1175 100644 --- a/llvm/lib/CodeGen/WasmEHPrepare.cpp +++ b/llvm/lib/CodeGen/WasmEHPrepare.cpp @@ -252,12 +252,11 @@ bool WasmEHPrepareImpl::prepareEHPads(Function &F) { M.getOrInsertGlobal("__wasm_lpad_context", LPadContextTy)); LPadContextGV->setThreadLocalMode(GlobalValue::GeneralDynamicTLSModel); - LPadIndexField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 0, - "lpad_index_gep"); - LSDAField = - IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 1, "lsda_gep"); - SelectorField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 2, - "selector_gep"); + LPadIndexField = LPadContextGV; + LSDAField = IRB.CreateConstInBoundsGEP2_32(LPadContextTy, LPadContextGV, 0, 1, + "lsda_gep"); + SelectorField = IRB.CreateConstInBoundsGEP2_32(LPadContextTy, LPadContextGV, + 0, 2, "selector_gep"); // wasm.landingpad.index() intrinsic, which is to specify landingpad index LPadIndexF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_landingpad_index); diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index eaf8c35142def..0046220611203 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -1129,7 +1129,8 @@ uint32_t RuntimeDyldELF::getMatchingLoRelocation(uint32_t RelType, bool RuntimeDyldELF::resolveAArch64ShortBranch( unsigned SectionID, relocation_iterator RelI, const RelocationValueRef &Value) { - uint64_t Address; + uint64_t TargetOffset; + unsigned TargetSectionID; if (Value.SymbolName) { auto Loc = GlobalSymbolTable.find(Value.SymbolName); @@ -1138,23 +1139,32 @@ bool RuntimeDyldELF::resolveAArch64ShortBranch( return false; const auto &SymInfo = Loc->second; - Address = - uint64_t(Sections[SymInfo.getSectionID()].getLoadAddressWithOffset( - SymInfo.getOffset())); + + TargetSectionID = SymInfo.getSectionID(); + TargetOffset = SymInfo.getOffset(); } else { - Address = uint64_t(Sections[Value.SectionID].getLoadAddress()); + TargetSectionID = Value.SectionID; + TargetOffset = 0; } - uint64_t Offset = RelI->getOffset(); - uint64_t SourceAddress = Sections[SectionID].getLoadAddressWithOffset(Offset); + + // We don't actually know the load addresses at this point, so if the + // branch is cross-section, we don't know exactly how far away it is. + if (TargetSectionID != SectionID) + return false; + + uint64_t SourceOffset = RelI->getOffset(); // R_AARCH64_CALL26 requires immediate to be in range -2^27 <= imm < 2^27 // If distance between source and target is out of range then we should // create thunk. - if (!isInt<28>(Address + Value.Addend - SourceAddress)) + if (!isInt<28>(TargetOffset + Value.Addend - SourceOffset)) return false; - resolveRelocation(Sections[SectionID], Offset, Address, RelI->getType(), - Value.Addend); + RelocationEntry RE(SectionID, SourceOffset, RelI->getType(), Value.Addend); + if (Value.SymbolName) + addRelocationForSymbol(RE, Value.SymbolName); + else + addRelocationForSection(RE, Value.SectionID); return true; } diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index ced5d78f994ab..8b1a21f962b08 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -1594,6 +1594,27 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, return; } + if (const ConstantPtrAuth *CPA = dyn_cast(CV)) { + Out << "ptrauth ("; + + // ptrauth (ptr CST, i32 KEY[, i64 DISC[, ptr ADDRDISC]?]?) + unsigned NumOpsToWrite = 2; + if (!CPA->getOperand(2)->isNullValue()) + NumOpsToWrite = 3; + if (!CPA->getOperand(3)->isNullValue()) + NumOpsToWrite = 4; + + ListSeparator LS; + for (unsigned i = 0, e = NumOpsToWrite; i != e; ++i) { + Out << LS; + WriterCtx.TypePrinter->print(CPA->getOperand(i)->getType(), Out); + Out << ' '; + WriteAsOperandInternal(Out, CPA->getOperand(i), WriterCtx); + } + Out << ')'; + return; + } + if (const ConstantArray *CA = dyn_cast(CV)) { Type *ETy = CA->getType()->getElementType(); Out << '['; diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index cfb89d557db47..119fcb4fa0346 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -550,6 +550,9 @@ void llvm::deleteConstant(Constant *C) { case Constant::NoCFIValueVal: delete static_cast(C); break; + case Constant::ConstantPtrAuthVal: + delete static_cast(C); + break; case Constant::UndefValueVal: delete static_cast(C); break; @@ -2015,6 +2018,124 @@ Value *NoCFIValue::handleOperandChangeImpl(Value *From, Value *To) { return nullptr; } +//---- ConstantPtrAuth::get() implementations. +// + +ConstantPtrAuth *ConstantPtrAuth::get(Constant *Ptr, ConstantInt *Key, + ConstantInt *Disc, Constant *AddrDisc) { + Constant *ArgVec[] = {Ptr, Key, Disc, AddrDisc}; + ConstantPtrAuthKeyType MapKey(ArgVec); + LLVMContextImpl *pImpl = Ptr->getContext().pImpl; + return pImpl->ConstantPtrAuths.getOrCreate(Ptr->getType(), MapKey); +} + +ConstantPtrAuth *ConstantPtrAuth::getWithSameSchema(Constant *Pointer) const { + return get(Pointer, getKey(), getDiscriminator(), getAddrDiscriminator()); +} + +ConstantPtrAuth::ConstantPtrAuth(Constant *Ptr, ConstantInt *Key, + ConstantInt *Disc, Constant *AddrDisc) + : Constant(Ptr->getType(), Value::ConstantPtrAuthVal, &Op<0>(), 4) { + assert(Ptr->getType()->isPointerTy()); + assert(Key->getBitWidth() == 32); + assert(Disc->getBitWidth() == 64); + assert(AddrDisc->getType()->isPointerTy()); + setOperand(0, Ptr); + setOperand(1, Key); + setOperand(2, Disc); + setOperand(3, AddrDisc); +} + +/// Remove the constant from the constant table. +void ConstantPtrAuth::destroyConstantImpl() { + getType()->getContext().pImpl->ConstantPtrAuths.remove(this); +} + +Value *ConstantPtrAuth::handleOperandChangeImpl(Value *From, Value *ToV) { + assert(isa(ToV) && "Cannot make Constant refer to non-constant!"); + Constant *To = cast(ToV); + + SmallVector Values; + Values.reserve(getNumOperands()); + + unsigned NumUpdated = 0; + + Use *OperandList = getOperandList(); + unsigned OperandNo = 0; + for (Use *O = OperandList, *E = OperandList + getNumOperands(); O != E; ++O) { + Constant *Val = cast(O->get()); + if (Val == From) { + OperandNo = (O - OperandList); + Val = To; + ++NumUpdated; + } + Values.push_back(Val); + } + + return getContext().pImpl->ConstantPtrAuths.replaceOperandsInPlace( + Values, this, From, To, NumUpdated, OperandNo); +} + +bool ConstantPtrAuth::isKnownCompatibleWith(const Value *Key, + const Value *Discriminator, + const DataLayout &DL) const { + // If the keys are different, there's no chance for this to be compatible. + if (getKey() != Key) + return false; + + // We can have 3 kinds of discriminators: + // - simple, integer-only: `i64 x, ptr null` vs. `i64 x` + // - address-only: `i64 0, ptr p` vs. `ptr p` + // - blended address/integer: `i64 x, ptr p` vs. `@llvm.ptrauth.blend(p, x)` + + // If this constant has a simple discriminator (integer, no address), easy: + // it's compatible iff the provided full discriminator is also a simple + // discriminator, identical to our integer discriminator. + if (!hasAddressDiscriminator()) + return getDiscriminator() == Discriminator; + + // Otherwise, we can isolate address and integer discriminator components. + const Value *AddrDiscriminator = nullptr; + + // This constant may or may not have an integer discriminator (instead of 0). + if (!getDiscriminator()->isNullValue()) { + // If it does, there's an implicit blend. We need to have a matching blend + // intrinsic in the provided full discriminator. + if (!match(Discriminator, + m_Intrinsic( + m_Value(AddrDiscriminator), m_Specific(getDiscriminator())))) + return false; + } else { + // Otherwise, interpret the provided full discriminator as address-only. + AddrDiscriminator = Discriminator; + } + + // Either way, we can now focus on comparing the address discriminators. + + // Discriminators are i64, so the provided addr disc may be a ptrtoint. + if (auto *Cast = dyn_cast(AddrDiscriminator)) + AddrDiscriminator = Cast->getPointerOperand(); + + // Beyond that, we're only interested in compatible pointers. + if (getAddrDiscriminator()->getType() != AddrDiscriminator->getType()) + return false; + + // These are often the same constant GEP, making them trivially equivalent. + if (getAddrDiscriminator() == AddrDiscriminator) + return true; + + // Finally, they may be equivalent base+offset expressions. + APInt Off1(DL.getIndexTypeSizeInBits(getAddrDiscriminator()->getType()), 0); + auto *Base1 = getAddrDiscriminator()->stripAndAccumulateConstantOffsets( + DL, Off1, /*AllowNonInbounds=*/true); + + APInt Off2(DL.getIndexTypeSizeInBits(AddrDiscriminator->getType()), 0); + auto *Base2 = AddrDiscriminator->stripAndAccumulateConstantOffsets( + DL, Off2, /*AllowNonInbounds=*/true); + + return Base1 == Base2 && Off1 == Off2; +} + //---- ConstantExpr::get() implementations. // diff --git a/llvm/lib/IR/ConstantsContext.h b/llvm/lib/IR/ConstantsContext.h index 7067d0d121117..5153880b5cab6 100644 --- a/llvm/lib/IR/ConstantsContext.h +++ b/llvm/lib/IR/ConstantsContext.h @@ -23,6 +23,7 @@ #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -286,6 +287,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CompareConstantExpr, Value) template struct ConstantAggrKeyType; struct InlineAsmKeyType; struct ConstantExprKeyType; +struct ConstantPtrAuthKeyType; template struct ConstantInfo; template <> struct ConstantInfo { @@ -308,6 +310,10 @@ template <> struct ConstantInfo { using ValType = ConstantAggrKeyType; using TypeClass = VectorType; }; +template <> struct ConstantInfo { + using ValType = ConstantPtrAuthKeyType; + using TypeClass = Type; +}; template struct ConstantAggrKeyType { ArrayRef Operands; @@ -536,6 +542,47 @@ struct ConstantExprKeyType { } }; +struct ConstantPtrAuthKeyType { + ArrayRef Operands; + + ConstantPtrAuthKeyType(ArrayRef Operands) : Operands(Operands) {} + + ConstantPtrAuthKeyType(ArrayRef Operands, const ConstantPtrAuth *) + : Operands(Operands) {} + + ConstantPtrAuthKeyType(const ConstantPtrAuth *C, + SmallVectorImpl &Storage) { + assert(Storage.empty() && "Expected empty storage"); + for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) + Storage.push_back(cast(C->getOperand(I))); + Operands = Storage; + } + + bool operator==(const ConstantPtrAuthKeyType &X) const { + return Operands == X.Operands; + } + + bool operator==(const ConstantPtrAuth *C) const { + if (Operands.size() != C->getNumOperands()) + return false; + for (unsigned I = 0, E = Operands.size(); I != E; ++I) + if (Operands[I] != C->getOperand(I)) + return false; + return true; + } + + unsigned getHash() const { + return hash_combine_range(Operands.begin(), Operands.end()); + } + + using TypeClass = typename ConstantInfo::TypeClass; + + ConstantPtrAuth *create(TypeClass *Ty) const { + return new ConstantPtrAuth(Operands[0], cast(Operands[1]), + cast(Operands[2]), Operands[3]); + } +}; + // Free memory for a given constant. Assumes the constant has already been // removed from all relevant maps. void deleteConstant(Constant *C); diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index bd06ff82a15a5..13fa1afeaaff2 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -79,7 +79,7 @@ using ProfileCount = Function::ProfileCount; // are not in the public header file... template class llvm::SymbolTableListTraits; -static cl::opt NonGlobalValueMaxNameSize( +static cl::opt NonGlobalValueMaxNameSize( "non-global-value-max-name-size", cl::Hidden, cl::init(1024), cl::desc("Maximum size for the name of non-global values.")); diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h index 399fe0dad26c7..392e0d16f1761 100644 --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -1562,6 +1562,8 @@ class LLVMContextImpl { DenseMap NoCFIValues; + ConstantUniqueMap ConstantPtrAuths; + ConstantUniqueMap ExprConstants; ConstantUniqueMap InlineAsms; diff --git a/llvm/lib/IR/ValueSymbolTable.cpp b/llvm/lib/IR/ValueSymbolTable.cpp index 52f7ddcdc65a2..a020acf22a96c 100644 --- a/llvm/lib/IR/ValueSymbolTable.cpp +++ b/llvm/lib/IR/ValueSymbolTable.cpp @@ -43,23 +43,34 @@ ValueSymbolTable::~ValueSymbolTable() { ValueName *ValueSymbolTable::makeUniqueName(Value *V, SmallString<256> &UniqueName) { unsigned BaseSize = UniqueName.size(); + bool AppenDot = false; + if (auto *GV = dyn_cast(V)) { + // A dot is appended to mark it as clone during ABI demangling so that + // for example "_Z1fv" and "_Z1fv.1" both demangle to "f()", the second + // one being a clone. + // On NVPTX we cannot use a dot because PTX only allows [A-Za-z0-9_$] for + // identifiers. This breaks ABI demangling but at least ptxas accepts and + // compiles the program. + const Module *M = GV->getParent(); + if (!(M && Triple(M->getTargetTriple()).isNVPTX())) + AppenDot = true; + } + while (true) { // Trim any suffix off and append the next number. UniqueName.resize(BaseSize); raw_svector_ostream S(UniqueName); - if (auto *GV = dyn_cast(V)) { - // A dot is appended to mark it as clone during ABI demangling so that - // for example "_Z1fv" and "_Z1fv.1" both demangle to "f()", the second - // one being a clone. - // On NVPTX we cannot use a dot because PTX only allows [A-Za-z0-9_$] for - // identifiers. This breaks ABI demangling but at least ptxas accepts and - // compiles the program. - const Module *M = GV->getParent(); - if (!(M && Triple(M->getTargetTriple()).isNVPTX())) - S << "."; - } + if (AppenDot) + S << "."; S << ++LastUnique; + // Retry if MaxNameSize has been exceeded. + if (MaxNameSize > -1 && UniqueName.size() > (size_t)MaxNameSize) { + assert(BaseSize >= UniqueName.size() - (size_t)MaxNameSize && + "Can't generate unique name: MaxNameSize is too small."); + BaseSize -= UniqueName.size() - (size_t)MaxNameSize; + continue; + } // Try insert the vmap entry with this suffix. auto IterBool = vmap.insert(std::make_pair(UniqueName.str(), V)); if (IterBool.second) diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 50f8d6ec84201..684e54444621b 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -629,6 +629,7 @@ class Verifier : public InstVisitor, VerifierSupport { void visitConstantExprsRecursively(const Constant *EntryC); void visitConstantExpr(const ConstantExpr *CE); + void visitConstantPtrAuth(const ConstantPtrAuth *CPA); void verifyInlineAsmCall(const CallBase &Call); void verifyStatepoint(const CallBase &Call); void verifyFrameRecoverIndices(); @@ -2422,6 +2423,9 @@ void Verifier::visitConstantExprsRecursively(const Constant *EntryC) { if (const auto *CE = dyn_cast(C)) visitConstantExpr(CE); + if (const auto *CPA = dyn_cast(C)) + visitConstantPtrAuth(CPA); + if (const auto *GV = dyn_cast(C)) { // Global Values get visited separately, but we do need to make sure // that the global value is in the correct module @@ -2449,6 +2453,23 @@ void Verifier::visitConstantExpr(const ConstantExpr *CE) { "Invalid bitcast", CE); } +void Verifier::visitConstantPtrAuth(const ConstantPtrAuth *CPA) { + Check(CPA->getPointer()->getType()->isPointerTy(), + "signed ptrauth constant base pointer must have pointer type"); + + Check(CPA->getType() == CPA->getPointer()->getType(), + "signed ptrauth constant must have same type as its base pointer"); + + Check(CPA->getKey()->getBitWidth() == 32, + "signed ptrauth constant key must be i32 constant integer"); + + Check(CPA->getAddrDiscriminator()->getType()->isPointerTy(), + "signed ptrauth constant address discriminator must be a pointer"); + + Check(CPA->getDiscriminator()->getBitWidth() == 64, + "signed ptrauth constant discriminator must be i64 constant integer"); +} + bool Verifier::verifyAttributeCount(AttributeList Attrs, unsigned Params) { // There shouldn't be more attribute sets than there are parameters plus the // function and return value. @@ -5090,6 +5111,8 @@ void Verifier::visitInstruction(Instruction &I) { } else if (isa(I.getOperand(i))) { Check(CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i), "Cannot take the address of an inline asm!", &I); + } else if (auto *CPA = dyn_cast(I.getOperand(i))) { + visitConstantExprsRecursively(CPA); } else if (ConstantExpr *CE = dyn_cast(I.getOperand(i))) { if (CE->getType()->isPtrOrPtrVectorTy()) { // If we have a ConstantExpr pointer, we need to see if it came from an diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 2cddaf330b3bc..8014ef9d03948 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -2580,7 +2580,7 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, MCAsmMacro &Macro, OS << NumOfMacroInstantiations; Pos += 2; } else if (Argument == "+") { - OS << Macro.Count++; + OS << Macro.Count; Pos += 2; } else { for (; Index < NParameters; ++Index) @@ -2629,6 +2629,7 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, MCAsmMacro &Macro, Body = Body.substr(Pos); } + ++Macro.Count; return false; } diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp index 6507a0e5950eb..23381955c60a8 100644 --- a/llvm/lib/Object/WasmObjectFile.cpp +++ b/llvm/lib/Object/WasmObjectFile.cpp @@ -177,8 +177,8 @@ static uint8_t readOpcode(WasmObjectFile::ReadContext &Ctx) { static wasm::ValType parseValType(WasmObjectFile::ReadContext &Ctx, uint32_t Code) { - // only directly encoded FUNCREF/EXTERNREF are supported - // (not ref null func or ref null extern) + // only directly encoded FUNCREF/EXTERNREF/EXNREF are supported + // (not ref null func, ref null extern, or ref null exn) switch (Code) { case wasm::WASM_TYPE_I32: case wasm::WASM_TYPE_I64: @@ -187,6 +187,7 @@ static wasm::ValType parseValType(WasmObjectFile::ReadContext &Ctx, case wasm::WASM_TYPE_V128: case wasm::WASM_TYPE_FUNCREF: case wasm::WASM_TYPE_EXTERNREF: + case wasm::WASM_TYPE_EXNREF: return wasm::ValType(Code); } if (Code == wasm::WASM_TYPE_NULLABLE || Code == wasm::WASM_TYPE_NONNULLABLE) { @@ -1288,6 +1289,7 @@ Error WasmObjectFile::parseImportSection(ReadContext &Ctx) { auto ElemType = Im.Table.ElemType; if (ElemType != wasm::ValType::FUNCREF && ElemType != wasm::ValType::EXTERNREF && + ElemType != wasm::ValType::EXNREF && ElemType != wasm::ValType::OTHERREF) return make_error("invalid table element type", object_error::parse_failed); @@ -1346,6 +1348,7 @@ Error WasmObjectFile::parseTableSection(ReadContext &Ctx) { auto ElemType = Tables.back().Type.ElemType; if (ElemType != wasm::ValType::FUNCREF && ElemType != wasm::ValType::EXTERNREF && + ElemType != wasm::ValType::EXNREF && ElemType != wasm::ValType::OTHERREF) { return make_error("invalid table element type", object_error::parse_failed); @@ -1680,6 +1683,7 @@ Error WasmObjectFile::parseElemSection(ReadContext &Ctx) { Segment.ElemKind = parseValType(Ctx, ElemKind); if (Segment.ElemKind != wasm::ValType::FUNCREF && Segment.ElemKind != wasm::ValType::EXTERNREF && + Segment.ElemKind != wasm::ValType::EXNREF && Segment.ElemKind != wasm::ValType::OTHERREF) { return make_error("invalid elem type", object_error::parse_failed); diff --git a/llvm/lib/ObjectYAML/WasmYAML.cpp b/llvm/lib/ObjectYAML/WasmYAML.cpp index 544a91d03dce0..7ad338f65706d 100644 --- a/llvm/lib/ObjectYAML/WasmYAML.cpp +++ b/llvm/lib/ObjectYAML/WasmYAML.cpp @@ -606,6 +606,7 @@ void ScalarEnumerationTraits::enumeration( ECase(V128); ECase(FUNCREF); ECase(EXTERNREF); + ECase(EXNREF); ECase(OTHERREF); #undef ECase } @@ -640,6 +641,7 @@ void ScalarEnumerationTraits::enumeration( #define ECase(X) IO.enumCase(Type, #X, CONCAT(X)); ECase(FUNCREF); ECase(EXTERNREF); + ECase(EXNREF); ECase(OTHERREF); #undef ECase } diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 836206a4fd86e..798236c295194 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -1212,7 +1212,8 @@ Error IndexedMemProfReader::deserialize(const unsigned char *Start, const uint64_t FirstWord = support::endian::readNext(Ptr); - if (FirstWord == memprof::Version1 || FirstWord == memprof::Version2) { + if (FirstWord == memprof::Version1 || FirstWord == memprof::Version2 || + FirstWord == memprof::Version3) { // Everything is good. We can proceed to deserialize the rest. Version = static_cast(FirstWord); } else if (FirstWord >= 24) { @@ -1559,6 +1560,7 @@ IndexedMemProfReader::getMemProfRecord(const uint64_t FuncNameHash) const { "MemProfCallStackTable must not be available"); return getMemProfRecordV0(IndexedRecord, *MemProfFrameTable); case memprof::Version2: + case memprof::Version3: assert(MemProfFrameTable && "MemProfFrameTable must be available"); assert(MemProfCallStackTable && "MemProfCallStackTable must be available"); return getMemProfRecordV2(IndexedRecord, *MemProfFrameTable, diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index b67a9700b680a..b16714ae8b9a2 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -617,6 +617,56 @@ static Error writeMemProfV2(ProfOStream &OS, return Error::success(); } +// Write out MemProf Version3 as follows: +// uint64_t Version +// uint64_t RecordTableOffset = RecordTableGenerator.Emit +// uint64_t FramePayloadOffset = Offset for the frame payload +// uint64_t FrameTableOffset = FrameTableGenerator.Emit +// uint64_t CallStackPayloadOffset = Offset for the call stack payload +// uint64_t CallStackTableOffset = CallStackTableGenerator.Emit +// uint64_t Num schema entries +// uint64_t Schema entry 0 +// uint64_t Schema entry 1 +// .... +// uint64_t Schema entry N - 1 +// OnDiskChainedHashTable MemProfRecordData +// OnDiskChainedHashTable MemProfFrameData +// OnDiskChainedHashTable MemProfCallStackData +static Error writeMemProfV3(ProfOStream &OS, + memprof::IndexedMemProfData &MemProfData, + bool MemProfFullSchema) { + OS.write(memprof::Version3); + uint64_t HeaderUpdatePos = OS.tell(); + OS.write(0ULL); // Reserve space for the memprof record table offset. + OS.write(0ULL); // Reserve space for the memprof frame payload offset. + OS.write(0ULL); // Reserve space for the memprof frame table offset. + OS.write(0ULL); // Reserve space for the memprof call stack payload offset. + OS.write(0ULL); // Reserve space for the memprof call stack table offset. + + auto Schema = memprof::getHotColdSchema(); + if (MemProfFullSchema) + Schema = memprof::getFullSchema(); + writeMemProfSchema(OS, Schema); + + uint64_t RecordTableOffset = writeMemProfRecords(OS, MemProfData.RecordData, + &Schema, memprof::Version3); + + uint64_t FramePayloadOffset = OS.tell(); + uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.FrameData); + + uint64_t CallStackPayloadOffset = OS.tell(); + uint64_t CallStackTableOffset = + writeMemProfCallStacks(OS, MemProfData.CallStackData); + + uint64_t Header[] = { + RecordTableOffset, FramePayloadOffset, FrameTableOffset, + CallStackPayloadOffset, CallStackTableOffset, + }; + OS.patch({{HeaderUpdatePos, Header, std::size(Header)}}); + + return Error::success(); +} + // Write out the MemProf data in a requested version. static Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, @@ -629,6 +679,8 @@ static Error writeMemProf(ProfOStream &OS, return writeMemProfV1(OS, MemProfData); case memprof::Version2: return writeMemProfV2(OS, MemProfData, MemProfFullSchema); + case memprof::Version3: + return writeMemProfV3(OS, MemProfData, MemProfFullSchema); } return make_error( diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp index e5608644519db..2f0e53736c82e 100644 --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -52,6 +52,7 @@ size_t IndexedAllocationInfo::serializedSize(const MemProfSchema &Schema, case Version1: return serializedSizeV0(*this, Schema); case Version2: + case Version3: return serializedSizeV2(*this, Schema); } llvm_unreachable("unsupported MemProf version"); @@ -95,6 +96,7 @@ size_t IndexedMemProfRecord::serializedSize(const MemProfSchema &Schema, case Version1: return serializedSizeV0(*this, Schema); case Version2: + case Version3: return serializedSizeV2(*this, Schema); } llvm_unreachable("unsupported MemProf version"); @@ -149,6 +151,7 @@ void IndexedMemProfRecord::serialize(const MemProfSchema &Schema, serializeV0(*this, Schema, OS); return; case Version2: + case Version3: serializeV2(*this, Schema, OS); return; } @@ -239,14 +242,15 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, case Version1: return deserializeV0(Schema, Ptr); case Version2: + case Version3: return deserializeV2(Schema, Ptr); } llvm_unreachable("unsupported MemProf version"); } MemProfRecord IndexedMemProfRecord::toMemProfRecord( - llvm::function_ref(const CallStackId)> - Callback) const { + llvm::function_ref(const CallStackId)> Callback) + const { MemProfRecord Record; Record.AllocSites.reserve(AllocSites.size()); diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp index 2a9b3903720be..283fcc153b33a 100644 --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -3665,6 +3665,15 @@ double IEEEFloat::convertToDouble() const { return api.bitsToDouble(); } +#ifdef HAS_IEE754_FLOAT128 +float128 IEEEFloat::convertToQuad() const { + assert(semantics == (const llvm::fltSemantics *)&semIEEEquad && + "Float semantics are not IEEEquads"); + APInt api = bitcastToAPInt(); + return api.bitsToQuad(); +} +#endif + /// Integer bit is explicit in this format. Intel hardware (387 and later) /// does not support these bit patterns: /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") @@ -5260,6 +5269,21 @@ double APFloat::convertToDouble() const { return Temp.getIEEE().convertToDouble(); } +#ifdef HAS_IEE754_FLOAT128 +float128 APFloat::convertToQuad() const { + if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad) + return getIEEE().convertToQuad(); + assert(getSemantics().isRepresentableBy(semIEEEquad) && + "Float semantics is not representable by IEEEquad"); + APFloat Temp = *this; + bool LosesInfo; + opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo); + assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); + (void)St; + return Temp.getIEEE().convertToQuad(); +} +#endif + float APFloat::convertToFloat() const { if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle) return getIEEE().convertToFloat(); diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index 1c7f6b870d390..3f717c8a60050 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -52,6 +52,19 @@ def ext_uaddv_to_uaddlv : GICombineRule< (apply [{ applyExtUaddvToUaddlv(*${root}, MRI, B, Observer, ${matchinfo}); }]) >; +class push_opcode_through_ext : GICombineRule < + (defs root:$root), + (match (extOpcode $ext1, $src1):$ExtMI, + (extOpcode $ext2, $src2), + (opcode $dst, $ext1, $ext2):$root, + [{ return matchPushAddSubExt(*${root}, MRI, ${dst}.getReg(), ${src1}.getReg(), ${src2}.getReg()); }]), + (apply [{ applyPushAddSubExt(*${root}, MRI, B, ${ExtMI}->getOpcode() == TargetOpcode::G_SEXT, ${dst}.getReg(), ${src1}.getReg(), ${src2}.getReg()); }])>; + +def push_sub_through_zext : push_opcode_through_ext; +def push_add_through_zext : push_opcode_through_ext; +def push_sub_through_sext : push_opcode_through_ext; +def push_add_through_sext : push_opcode_through_ext; + def AArch64PreLegalizerCombiner: GICombiner< "AArch64PreLegalizerCombinerImpl", [all_combines, fconstant_to_constant, @@ -59,7 +72,11 @@ def AArch64PreLegalizerCombiner: GICombiner< fold_global_offset, shuffle_to_extract, ext_addv_to_udot_addv, - ext_uaddv_to_uaddlv]> { + ext_uaddv_to_uaddlv, + push_sub_through_zext, + push_add_through_zext, + push_sub_through_sext, + push_add_through_sext]> { let CombineAllMethodName = "tryCombineAllImpl"; } @@ -265,6 +282,14 @@ def or_to_bsp: GICombineRule < (apply [{ applyOrToBSP(*${root}, MRI, B, ${matchinfo}); }]) >; +// Combines Mul(And(Srl(X, 15), 0x10001), 0xffff) into CMLTz +def combine_mul_cmlt : GICombineRule< + (defs root:$root, register_matchinfo:$matchinfo), + (match (wip_match_opcode G_MUL):$root, + [{ return matchCombineMulCMLT(*${root}, MRI, ${matchinfo}); }]), + (apply [{ applyCombineMulCMLT(*${root}, MRI, B, ${matchinfo}); }]) +>; + // Post-legalization combines which should happen at all optimization levels. // (E.g. ones that facilitate matching for the selector) For example, matching // pseudos. @@ -296,5 +321,6 @@ def AArch64PostLegalizerCombiner split_store_zero_128, undef_combines, select_to_minmax, or_to_bsp, combine_concat_vector, commute_constant_to_rhs, - push_freeze_to_prevent_poison_from_propagating]> { + push_freeze_to_prevent_poison_from_propagating, + combine_mul_cmlt]> { } diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 25f2e4d7c4de6..8fd58f4698d28 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -5717,6 +5717,12 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { case Intrinsic::aarch64_sve_ucvtf_x4: SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS); return; + case Intrinsic::aarch64_sve_fcvt_widen_x2: + SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S); + return; + case Intrinsic::aarch64_sve_fcvtl_widen_x2: + SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S); + return; case Intrinsic::aarch64_sve_sclamp_single_x2: if (auto Op = SelectOpcodeFromVT( Node->getValueType(0), @@ -5738,6 +5744,9 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { AArch64::FCLAMP_VG2_2Z2Z_D})) SelectClamp(Node, 2, Op); return; + case Intrinsic::aarch64_sve_bfclamp_single_x2: + SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H); + return; case Intrinsic::aarch64_sve_sclamp_single_x4: if (auto Op = SelectOpcodeFromVT( Node->getValueType(0), @@ -5759,6 +5768,9 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { AArch64::FCLAMP_VG4_4Z4Z_D})) SelectClamp(Node, 4, Op); return; + case Intrinsic::aarch64_sve_bfclamp_single_x4: + SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H); + return; case Intrinsic::aarch64_sve_add_single_x2: if (auto Op = SelectOpcodeFromVT( Node->getValueType(0), diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 25ba8d8500306..3e2a5bfbc2321 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -360,24 +360,24 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, if (Subtarget->hasNEON()) { addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass); addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass); - // Someone set us up the NEON. - addDRTypeForNEON(MVT::v2f32); - addDRTypeForNEON(MVT::v8i8); - addDRTypeForNEON(MVT::v4i16); - addDRTypeForNEON(MVT::v2i32); - addDRTypeForNEON(MVT::v1i64); - addDRTypeForNEON(MVT::v1f64); - addDRTypeForNEON(MVT::v4f16); - addDRTypeForNEON(MVT::v4bf16); - - addQRTypeForNEON(MVT::v4f32); - addQRTypeForNEON(MVT::v2f64); - addQRTypeForNEON(MVT::v16i8); - addQRTypeForNEON(MVT::v8i16); - addQRTypeForNEON(MVT::v4i32); - addQRTypeForNEON(MVT::v2i64); - addQRTypeForNEON(MVT::v8f16); - addQRTypeForNEON(MVT::v8bf16); + + addDRType(MVT::v2f32); + addDRType(MVT::v8i8); + addDRType(MVT::v4i16); + addDRType(MVT::v2i32); + addDRType(MVT::v1i64); + addDRType(MVT::v1f64); + addDRType(MVT::v4f16); + addDRType(MVT::v4bf16); + + addQRType(MVT::v4f32); + addQRType(MVT::v2f64); + addQRType(MVT::v16i8); + addQRType(MVT::v8i16); + addQRType(MVT::v4i32); + addQRType(MVT::v2i64); + addQRType(MVT::v8f16); + addQRType(MVT::v8bf16); } if (Subtarget->hasSVEorSME()) { @@ -1125,7 +1125,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - if (Subtarget->hasNEON()) { + if (Subtarget->isNeonAvailable()) { // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to // silliness like this: for (auto Op : @@ -1337,6 +1337,24 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // FADDP custom lowering for (MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 }) setOperationAction(ISD::FADD, VT, Custom); + } else /* !isNeonAvailable */ { + for (MVT VT : MVT::fixedlen_vector_valuetypes()) { + for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) + setOperationAction(Op, VT, Expand); + + if (VT.is128BitVector() || VT.is64BitVector()) { + setOperationAction(ISD::LOAD, VT, Legal); + setOperationAction(ISD::STORE, VT, Legal); + setOperationAction(ISD::BITCAST, VT, + Subtarget->isLittleEndian() ? Legal : Expand); + } + for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { + setTruncStoreAction(VT, InnerVT, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); + } + } } if (Subtarget->hasSME()) { @@ -2020,14 +2038,16 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) { setOperationAction(ISD::ZERO_EXTEND, VT, Default); } -void AArch64TargetLowering::addDRTypeForNEON(MVT VT) { +void AArch64TargetLowering::addDRType(MVT VT) { addRegisterClass(VT, &AArch64::FPR64RegClass); - addTypeForNEON(VT); + if (Subtarget->isNeonAvailable()) + addTypeForNEON(VT); } -void AArch64TargetLowering::addQRTypeForNEON(MVT VT) { +void AArch64TargetLowering::addQRType(MVT VT) { addRegisterClass(VT, &AArch64::FPR128RegClass); - addTypeForNEON(VT); + if (Subtarget->isNeonAvailable()) + addTypeForNEON(VT); } EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, @@ -9445,7 +9465,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { - if (!Subtarget->hasNEON()) + if (!Subtarget->isNeonAvailable() && + !Subtarget->useSVEForFixedLengthVectors()) return SDValue(); EVT VT = Op.getValueType(); @@ -14141,6 +14162,13 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi); } +bool AArch64TargetLowering::shouldExpandBuildVectorWithShuffles( + EVT VT, unsigned DefinedValues) const { + if (!Subtarget->isNeonAvailable()) + return false; + return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); +} + bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef M, EVT VT) const { // Currently no fixed length shuffles that require SVE are legal. if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) @@ -14337,7 +14365,7 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, unsigned Opc = (Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR; return DAG.getNode(Opc, DL, VT, Op.getOperand(0), - DAG.getConstant(Cnt, DL, MVT::i32)); + DAG.getConstant(Cnt, DL, MVT::i32), Op->getFlags()); } // Right shift register. Note, there is not a shift right register @@ -19838,7 +19866,8 @@ performSVEMulAddSubCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // help, for example, to produce ssra from sshr+add. static SDValue performAddSubIntoVectorOp(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); - if (VT != MVT::i64) + if (VT != MVT::i64 || + DAG.getTargetLoweringInfo().isOperationExpand(N->getOpcode(), MVT::v1i64)) return SDValue(); SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index a44a3d35d2f9c..73bc9ad53bb8a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -1017,8 +1017,10 @@ class AArch64TargetLowering : public TargetLowering { void addTypeForNEON(MVT VT); void addTypeForFixedLengthSVE(MVT VT); - void addDRTypeForNEON(MVT VT); - void addQRTypeForNEON(MVT VT); + void addDRType(MVT VT); + void addQRType(MVT VT); + + bool shouldExpandBuildVectorWithShuffles(EVT, unsigned) const override; unsigned allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 4830033b23527..dd54520c8ddad 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -733,6 +733,12 @@ def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>; def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>; def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>; + +def AArch64vashr_exact : PatFrag<(ops node:$lhs, node:$rhs), + (AArch64vashr node:$lhs, node:$rhs), [{ + return N->getFlags().hasExact(); +}]>; + def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>; def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>; def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>; @@ -7710,6 +7716,25 @@ defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf", defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", AArch64rshrn>; defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>; +let Predicates = [HasNEON] in { +def : Pat<(v2f32 (sint_to_fp (v2i32 (AArch64vashr_exact v2i32:$Vn, i32:$shift)))), + (SCVTFv2i32_shift $Vn, vecshiftR32:$shift)>; + +def : Pat<(v4f32 (sint_to_fp (v4i32 (AArch64vashr_exact v4i32:$Vn, i32:$shift)))), + (SCVTFv4i32_shift $Vn, vecshiftR32:$shift)>; + +def : Pat<(v2f64 (sint_to_fp (v2i64 (AArch64vashr_exact v2i64:$Vn, i32:$shift)))), + (SCVTFv2i64_shift $Vn, vecshiftR64:$shift)>; +} + +let Predicates = [HasNEON, HasFullFP16] in { +def : Pat<(v4f16 (sint_to_fp (v4i16 (AArch64vashr_exact v4i16:$Vn, i32:$shift)))), + (SCVTFv4i16_shift $Vn, vecshiftR16:$shift)>; + +def : Pat<(v8f16 (sint_to_fp (v8i16 (AArch64vashr_exact v8i16:$Vn, i32:$shift)))), + (SCVTFv8i16_shift $Vn, vecshiftR16:$shift)>; +} + // X << 1 ==> X + X class SHLToADDPat : Pat<(ty (AArch64vshl (ty regtype:$Rn), (i32 1))), diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 3b3c1fc8b27bf..4a7c82b393c10 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -227,6 +227,8 @@ class AArch64InstructionSelector : public InstructionSelector { bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI); bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI); bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI); + void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs, + unsigned Opc1, unsigned Opc2, bool isExt); bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI); bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI); @@ -6537,6 +6539,25 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I, I.eraseFromParent(); return true; } + case Intrinsic::aarch64_neon_tbl2: + SelectTable(I, MRI, 2, AArch64::TBLv8i8Two, AArch64::TBLv16i8Two, false); + return true; + case Intrinsic::aarch64_neon_tbl3: + SelectTable(I, MRI, 3, AArch64::TBLv8i8Three, AArch64::TBLv16i8Three, + false); + return true; + case Intrinsic::aarch64_neon_tbl4: + SelectTable(I, MRI, 4, AArch64::TBLv8i8Four, AArch64::TBLv16i8Four, false); + return true; + case Intrinsic::aarch64_neon_tbx2: + SelectTable(I, MRI, 2, AArch64::TBXv8i8Two, AArch64::TBXv16i8Two, true); + return true; + case Intrinsic::aarch64_neon_tbx3: + SelectTable(I, MRI, 3, AArch64::TBXv8i8Three, AArch64::TBXv16i8Three, true); + return true; + case Intrinsic::aarch64_neon_tbx4: + SelectTable(I, MRI, 4, AArch64::TBXv8i8Four, AArch64::TBXv16i8Four, true); + return true; case Intrinsic::swift_async_context_addr: auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()}, {Register(AArch64::FP)}) @@ -6552,6 +6573,30 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I, return false; } +void AArch64InstructionSelector::SelectTable(MachineInstr &I, + MachineRegisterInfo &MRI, + unsigned NumVec, unsigned Opc1, + unsigned Opc2, bool isExt) { + Register DstReg = I.getOperand(0).getReg(); + unsigned Opc = MRI.getType(DstReg) == LLT::fixed_vector(8, 8) ? Opc1 : Opc2; + + // Create the REG_SEQUENCE + SmallVector Regs; + for (unsigned i = 0; i < NumVec; i++) + Regs.push_back(I.getOperand(i + 2 + isExt).getReg()); + Register RegSeq = createQTuple(Regs, MIB); + + Register IdxReg = I.getOperand(2 + NumVec + isExt).getReg(); + MachineInstrBuilder Instr; + if (isExt) { + Register Reg = I.getOperand(2).getReg(); + Instr = MIB.buildInstr(Opc, {DstReg}, {Reg, RegSeq, IdxReg}); + } else + Instr = MIB.buildInstr(Opc, {DstReg}, {RegSeq, IdxReg}); + constrainSelectedInstRegOperands(*Instr, TII, TRI, RBI); + I.eraseFromParent(); +} + InstructionSelector::ComplexRendererFns AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const { auto MaybeImmed = getImmedFromMO(Root); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp index d8ca5494ba50a..7f3e0e01ccd25 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -381,6 +381,61 @@ void applyOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI, MI.eraseFromParent(); } +// Combines Mul(And(Srl(X, 15), 0x10001), 0xffff) into CMLTz +bool matchCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI, + Register &SrcReg) { + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + if (DstTy != LLT::fixed_vector(2, 64) && DstTy != LLT::fixed_vector(2, 32) && + DstTy != LLT::fixed_vector(4, 32) && DstTy != LLT::fixed_vector(4, 16) && + DstTy != LLT::fixed_vector(8, 16)) + return false; + + auto AndMI = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI); + if (AndMI->getOpcode() != TargetOpcode::G_AND) + return false; + auto LShrMI = getDefIgnoringCopies(AndMI->getOperand(1).getReg(), MRI); + if (LShrMI->getOpcode() != TargetOpcode::G_LSHR) + return false; + + // Check the constant splat values + auto V1 = isConstantOrConstantSplatVector( + *MRI.getVRegDef(MI.getOperand(2).getReg()), MRI); + auto V2 = isConstantOrConstantSplatVector( + *MRI.getVRegDef(AndMI->getOperand(2).getReg()), MRI); + auto V3 = isConstantOrConstantSplatVector( + *MRI.getVRegDef(LShrMI->getOperand(2).getReg()), MRI); + if (!V1.has_value() || !V2.has_value() || !V3.has_value()) + return false; + unsigned HalfSize = DstTy.getScalarSizeInBits() / 2; + if (!V1.value().isMask(HalfSize) || V2.value() != (1ULL | 1ULL << HalfSize) || + V3 != (HalfSize - 1)) + return false; + + SrcReg = LShrMI->getOperand(1).getReg(); + + return true; +} + +void applyCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, Register &SrcReg) { + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT HalfTy = + DstTy.changeElementCount(DstTy.getElementCount().multiplyCoefficientBy(2)) + .changeElementSize(DstTy.getScalarSizeInBits() / 2); + + Register ZeroVec = B.buildConstant(HalfTy, 0).getReg(0); + Register CastReg = + B.buildInstr(TargetOpcode::G_BITCAST, {HalfTy}, {SrcReg}).getReg(0); + Register CMLTReg = + B.buildICmp(CmpInst::Predicate::ICMP_SLT, HalfTy, CastReg, ZeroVec) + .getReg(0); + + B.buildInstr(TargetOpcode::G_BITCAST, {DstReg}, {CMLTReg}).getReg(0); + MI.eraseFromParent(); +} + class AArch64PostLegalizerCombinerImpl : public Combiner { protected: // TODO: Make CombinerHelper methods const. diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp index a82d3cd095659..0f89fa557cd57 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp @@ -554,6 +554,57 @@ void applyExtUaddvToUaddlv(MachineInstr &MI, MachineRegisterInfo &MRI, MI.eraseFromParent(); } +// Pushes ADD/SUB through extend instructions to decrease the number of extend +// instruction at the end by allowing selection of {s|u}addl sooner + +// i32 add(i32 ext i8, i32 ext i8) => i32 ext(i16 add(i16 ext i8, i16 ext i8)) +bool matchPushAddSubExt(MachineInstr &MI, MachineRegisterInfo &MRI, + Register DstReg, Register SrcReg1, Register SrcReg2) { + assert(MI.getOpcode() == TargetOpcode::G_ADD || + MI.getOpcode() == TargetOpcode::G_SUB && + "Expected a G_ADD or G_SUB instruction\n"); + + // Deal with vector types only + LLT DstTy = MRI.getType(DstReg); + if (!DstTy.isVector()) + return false; + + // Return true if G_{S|Z}EXT instruction is more than 2* source + Register ExtDstReg = MI.getOperand(1).getReg(); + LLT Ext1SrcTy = MRI.getType(SrcReg1); + LLT Ext2SrcTy = MRI.getType(SrcReg2); + unsigned ExtDstScal = MRI.getType(ExtDstReg).getScalarSizeInBits(); + unsigned Ext1SrcScal = Ext1SrcTy.getScalarSizeInBits(); + if (((Ext1SrcScal == 8 && ExtDstScal == 32) || + ((Ext1SrcScal == 8 || Ext1SrcScal == 16) && ExtDstScal == 64)) && + Ext1SrcTy == Ext2SrcTy) + return true; + + return false; +} + +void applyPushAddSubExt(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, bool isSExt, Register DstReg, + Register SrcReg1, Register SrcReg2) { + LLT SrcTy = MRI.getType(SrcReg1); + LLT MidTy = SrcTy.changeElementSize(SrcTy.getScalarSizeInBits() * 2); + unsigned Opc = isSExt ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; + Register Ext1Reg = B.buildInstr(Opc, {MidTy}, {SrcReg1}).getReg(0); + Register Ext2Reg = B.buildInstr(Opc, {MidTy}, {SrcReg2}).getReg(0); + Register AddReg = + B.buildInstr(MI.getOpcode(), {MidTy}, {Ext1Reg, Ext2Reg}).getReg(0); + + // G_SUB has to sign-extend the result. + // G_ADD needs to sext from sext and can sext or zext from zext, so the + // original opcode is used. + if (MI.getOpcode() == TargetOpcode::G_ADD) + B.buildInstr(Opc, {DstReg}, {AddReg}); + else + B.buildSExt(DstReg, AddReg); + + MI.eraseFromParent(); +} + bool tryToSimplifyUADDO(MachineInstr &MI, MachineIRBuilder &B, CombinerHelper &Helper, GISelChangeObserver &Observer) { // Try simplify G_UADDO with 8 or 16 bit operands to wide G_ADD and TBNZ if diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 50ee37b0dfebc..b21b1faf5c962 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -104,6 +104,13 @@ class sme2_move_to_tile_pseudo + : SMEPseudo2Instr, + Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rs, index_ty:$imm), []> { + let SMEMatrixType = za_flag; + let usesCustomInserter = 1; +} + //===----------------------------------------------------------------------===// // SME pattern match helpers. //===----------------------------------------------------------------------===// @@ -189,6 +196,9 @@ class SME2_Tile_VG4_Multi_Pat(name # _PSEUDO) $tile, $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>; +class SME2_Zero_Matrix_Pat + : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset))), + (!cast(name) $base, $offset)>; //===----------------------------------------------------------------------===// // SME pattern match helpers. //===----------------------------------------------------------------------===// @@ -4815,39 +4825,57 @@ class sme2p1_zero_matrix opc, Operand index_ty, string mnemonic, } multiclass sme2p1_zero_matrix { - def _VG2_Z : sme2p1_zero_matrix<{0b000,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx2"> { + def _VG2_Z : sme2p1_zero_matrix<{0b000,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx2">, SMEPseudo2Instr { bits<3> imm; let Inst{2-0} = imm; } - def _2Z : sme2p1_zero_matrix<{0b001,?,?,?}, uimm3s2range, mnemonic> { + def _2Z : sme2p1_zero_matrix<{0b001,?,?,?}, uimm3s2range, mnemonic>, SMEPseudo2Instr { bits<3> imm; let Inst{2-0} = imm; } - def _VG2_2Z : sme2p1_zero_matrix<{0b0100,?,?}, uimm2s2range, mnemonic, "vgx2"> { + def _VG2_2Z : sme2p1_zero_matrix<{0b0100,?,?}, uimm2s2range, mnemonic, "vgx2">, SMEPseudo2Instr { bits<2> imm; let Inst{1-0} = imm; } - def _VG4_2Z : sme2p1_zero_matrix<{0b0110,?,?}, uimm2s2range, mnemonic, "vgx4"> { + def _VG4_2Z : sme2p1_zero_matrix<{0b0110,?,?}, uimm2s2range, mnemonic, "vgx4">, SMEPseudo2Instr { bits<2> imm; let Inst{1-0} = imm; } - def _VG4_Z : sme2p1_zero_matrix<{0b100,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx4"> { + def _VG4_Z : sme2p1_zero_matrix<{0b100,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx4">, SMEPseudo2Instr { bits<3> imm; let Inst{2-0} = imm; } - def _4Z : sme2p1_zero_matrix<{0b1010,?,?}, uimm2s4range, mnemonic> { + def _4Z : sme2p1_zero_matrix<{0b1010,?,?}, uimm2s4range, mnemonic>, SMEPseudo2Instr { bits<2> imm; let Inst{1-0} = imm; } - def _VG2_4Z :sme2p1_zero_matrix<{0b11000,?}, uimm1s4range, mnemonic, "vgx2"> { + def _VG2_4Z : sme2p1_zero_matrix<{0b11000,?}, uimm1s4range, mnemonic, "vgx2">, SMEPseudo2Instr { bits<1> imm; let Inst{0} = imm; } - def _VG4_4Z :sme2p1_zero_matrix<{0b11100,?}, uimm1s4range, mnemonic, "vgx4"> { + def _VG4_4Z : sme2p1_zero_matrix<{0b11100,?}, uimm1s4range, mnemonic, "vgx4">, SMEPseudo2Instr { bits<1> imm; let Inst{0} = imm; } -} + + def NAME # _VG2_Z_PSEUDO : sem2p1_zero_matrix_pseudo; + def NAME # _VG4_Z_PSEUDO : sem2p1_zero_matrix_pseudo; + def NAME # _2Z_PSEUDO : sem2p1_zero_matrix_pseudo; + def NAME # _VG2_2Z_PSEUDO : sem2p1_zero_matrix_pseudo; + def NAME # _VG4_2Z_PSEUDO : sem2p1_zero_matrix_pseudo; + def NAME # _4Z_PSEUDO : sem2p1_zero_matrix_pseudo; + def NAME # _VG2_4Z_PSEUDO : sem2p1_zero_matrix_pseudo; + def NAME # _VG4_4Z_PSEUDO : sem2p1_zero_matrix_pseudo; + + def : SME2_Zero_Matrix_Pat; + def : SME2_Zero_Matrix_Pat; + def : SME2_Zero_Matrix_Pat; + def : SME2_Zero_Matrix_Pat; + def : SME2_Zero_Matrix_Pat; + def : SME2_Zero_Matrix_Pat; + def : SME2_Zero_Matrix_Pat; + def : SME2_Zero_Matrix_Pat; +} //===----------------------------------------------------------------------===// // SME2.1 lookup table expand two non-contiguous registers diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 351263d079768..24f9a6e375baa 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -494,7 +494,7 @@ class MIMG_NoSampler_nsa_gfx11 - : VIMAGE_gfx12 { + : VIMAGE_gfx12 { let InOperandList = !con(AddrIns, (ins SReg_256:$rsrc, DMask:$dmask, Dim:$dim, CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe), diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index eae666ab0e7d7..97a8ff4486609 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -579,6 +579,7 @@ Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg( (!GITPtrLoReg || !TRI->isSubRegisterEq(Reg, GITPtrLoReg))) { MRI.replaceRegWith(ScratchRsrcReg, Reg); MFI->setScratchRSrcReg(Reg); + MRI.reserveReg(Reg, TRI); return Reg; } } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index ddb5f71935685..4b5f9bdd82b8d 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -2083,6 +2083,9 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?"); + assert(MF->getRegInfo().isReserved(MFI->getScratchRSrcReg()) && + "unreserved scratch RSRC register"); + MachineOperand &FIOp = MI->getOperand(FIOperandNum); int Index = MI->getOperand(FIOperandNum).getIndex(); diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index ea8109bbee9ae..09dc1c781e2f3 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -278,11 +278,10 @@ LLVM_DUMP_METHOD void SIWholeQuadMode::printInfo() { for (const MachineInstr &MI : *BII.first) { auto III = Instructions.find(&MI); - if (III == Instructions.end()) - continue; - - dbgs() << " " << MI << " Needs = " << PrintState(III->second.Needs) - << ", OutNeeds = " << PrintState(III->second.OutNeeds) << '\n'; + if (III != Instructions.end()) { + dbgs() << " " << MI << " Needs = " << PrintState(III->second.Needs) + << ", OutNeeds = " << PrintState(III->second.OutNeeds) << '\n'; + } } } } @@ -455,10 +454,8 @@ void SIWholeQuadMode::markOperand(const MachineInstr &MI, for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) { LiveRange &LR = LIS->getRegUnit(Unit); const VNInfo *Value = LR.Query(LIS->getInstructionIndex(MI)).valueIn(); - if (!Value) - continue; - - markDefs(MI, LR, Unit, AMDGPU::NoSubRegister, Flag, Worklist); + if (Value) + markDefs(MI, LR, Unit, AMDGPU::NoSubRegister, Flag, Worklist); } } } @@ -499,19 +496,16 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, if (TII->isWQM(Opcode)) { // If LOD is not supported WQM is not needed. - if (!ST->hasExtendedImageInsts()) - continue; // Only generate implicit WQM if implicit derivatives are required. // This avoids inserting unintended WQM if a shader type without // implicit derivatives uses an image sampling instruction. - if (!HasImplicitDerivatives) - continue; - // Sampling instructions don't need to produce results for all pixels - // in a quad, they just require all inputs of a quad to have been - // computed for derivatives. - markInstructionUses(MI, StateWQM, Worklist); - GlobalFlags |= StateWQM; - continue; + if (ST->hasExtendedImageInsts() && HasImplicitDerivatives) { + // Sampling instructions don't need to produce results for all pixels + // in a quad, they just require all inputs of a quad to have been + // computed for derivatives. + markInstructionUses(MI, StateWQM, Worklist); + GlobalFlags |= StateWQM; + } } else if (Opcode == AMDGPU::WQM) { // The WQM intrinsic requires its output to have all the helper lanes // correct, so we need it to be in WQM. @@ -520,7 +514,6 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, } else if (Opcode == AMDGPU::SOFT_WQM) { LowerToCopyInstrs.push_back(&MI); SoftWQMInstrs.push_back(&MI); - continue; } else if (Opcode == AMDGPU::STRICT_WWM) { // The STRICT_WWM intrinsic doesn't make the same guarantee, and plus // it needs to be executed in WQM or Exact so that its copy doesn't @@ -528,7 +521,6 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, markInstructionUses(MI, StateStrictWWM, Worklist); GlobalFlags |= StateStrictWWM; LowerToMovInstrs.push_back(&MI); - continue; } else if (Opcode == AMDGPU::STRICT_WQM || TII->isDualSourceBlendEXP(MI)) { // STRICT_WQM is similar to STRICTWWM, but instead of enabling all @@ -551,7 +543,6 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, GlobalFlags |= StateExact; III.Disabled = StateWQM | StateStrict; } - continue; } else if (Opcode == AMDGPU::LDS_PARAM_LOAD || Opcode == AMDGPU::DS_PARAM_LOAD || Opcode == AMDGPU::LDS_DIRECT_LOAD || @@ -561,7 +552,6 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, InstrInfo &II = Instructions[&MI]; II.Needs |= StateStrictWQM; GlobalFlags |= StateStrictWQM; - continue; } else if (Opcode == AMDGPU::V_SET_INACTIVE_B32 || Opcode == AMDGPU::V_SET_INACTIVE_B64) { III.Disabled = StateStrict; @@ -574,7 +564,6 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, } } SetInactiveInstrs.push_back(&MI); - continue; } else if (TII->isDisableWQM(MI)) { BBI.Needs |= StateExact; if (!(BBI.InNeeds & StateExact)) { @@ -583,40 +572,33 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, } GlobalFlags |= StateExact; III.Disabled = StateWQM | StateStrict; - continue; - } else { - if (Opcode == AMDGPU::SI_PS_LIVE || Opcode == AMDGPU::SI_LIVE_MASK) { - LiveMaskQueries.push_back(&MI); - } else if (Opcode == AMDGPU::SI_KILL_I1_TERMINATOR || - Opcode == AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR || - Opcode == AMDGPU::SI_DEMOTE_I1) { - KillInstrs.push_back(&MI); - BBI.NeedsLowering = true; - } else if (WQMOutputs) { - // The function is in machine SSA form, which means that physical - // VGPRs correspond to shader inputs and outputs. Inputs are - // only used, outputs are only defined. - // FIXME: is this still valid? - for (const MachineOperand &MO : MI.defs()) { - if (!MO.isReg()) - continue; - - Register Reg = MO.getReg(); - - if (!Reg.isVirtual() && - TRI->hasVectorRegisters(TRI->getPhysRegBaseClass(Reg))) { - Flags = StateWQM; - break; - } + } else if (Opcode == AMDGPU::SI_PS_LIVE || + Opcode == AMDGPU::SI_LIVE_MASK) { + LiveMaskQueries.push_back(&MI); + } else if (Opcode == AMDGPU::SI_KILL_I1_TERMINATOR || + Opcode == AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR || + Opcode == AMDGPU::SI_DEMOTE_I1) { + KillInstrs.push_back(&MI); + BBI.NeedsLowering = true; + } else if (WQMOutputs) { + // The function is in machine SSA form, which means that physical + // VGPRs correspond to shader inputs and outputs. Inputs are + // only used, outputs are only defined. + // FIXME: is this still valid? + for (const MachineOperand &MO : MI.defs()) { + Register Reg = MO.getReg(); + if (Reg.isPhysical() && + TRI->hasVectorRegisters(TRI->getPhysRegBaseClass(Reg))) { + Flags = StateWQM; + break; } } - - if (!Flags) - continue; } - markInstruction(MI, Flags, Worklist); - GlobalFlags |= Flags; + if (Flags) { + markInstruction(MI, Flags, Worklist); + GlobalFlags |= Flags; + } } } @@ -1568,8 +1550,6 @@ void SIWholeQuadMode::lowerKillInstrs(bool IsWQM) { case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: SplitPoint = lowerKillF32(*MBB, *MI); break; - default: - continue; } if (SplitPoint) splitBlock(MBB, SplitPoint); diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index a3144109b7204..a46c383115e2d 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -251,6 +251,9 @@ namespace { SetVector &Predicates = PredicatedInsts[MI]; if (Exclusive && Predicates.size() != 1) return false; + // We do not know how to convert an else predicate of a VCTP. + if (getVPTInstrPredicate(*MI) == ARMVCC::Else) + return false; return llvm::any_of(Predicates, isVCTP); } @@ -305,8 +308,12 @@ namespace { // isn't predicated on entry, check whether the vctp is within the block // and that all other instructions are then predicated on it. for (auto &Block : Blocks) { - if (isEntryPredicatedOnVCTP(Block, false) || - hasImplicitlyValidVPT(Block, RDA)) + if (isEntryPredicatedOnVCTP(Block, false) && + !any_of(drop_begin(Block.getInsts()), [](const MachineInstr *MI) { + return getVPTInstrPredicate(*MI) == ARMVCC::Else; + })) + continue; + if (hasImplicitlyValidVPT(Block, RDA)) continue; SmallVectorImpl &Insts = Block.getInsts(); diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index 639771ab9eabb..84ef582c029d3 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -296,8 +296,7 @@ def FeatureVectorsUseTwoUnits : SubtargetFeature<"vectors-use-two-units", def FeaturePrefixInstrs : SubtargetFeature<"prefix-instrs", "HasPrefixInstrs", "true", "Enable prefixed instructions", - [FeatureISA3_0, FeatureP8Vector, - FeatureP9Altivec]>; + [FeatureISA3_1]>; def FeaturePCRelativeMemops : SubtargetFeature<"pcrelative-memops", "HasPCRelativeMemops", "true", "Enable PC relative Memory Ops", diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 8450ce9e0e3b3..a0e91f4dc3a4a 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9460,7 +9460,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // double. This is to exploit the XXSPLTIDP instruction. // If we lose precision, we use XXSPLTI32DX. if (BVNIsConstantSplat && (SplatBitSize == 64) && - Subtarget.hasPrefixInstrs()) { + Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) { // Check the type first to short-circuit so we don't modify APSplatBits if // this block isn't executed. if ((Op->getValueType(0) == MVT::v2f64) && @@ -9605,11 +9605,11 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be // turned into a 4-byte splat of 0xABABABAB. - if (Subtarget.hasPrefixInstrs() && SplatSize == 2) + if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2) return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2, Op.getValueType(), DAG, dl); - if (Subtarget.hasPrefixInstrs() && SplatSize == 4) + if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4) return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG, dl); @@ -10242,7 +10242,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); } - if (Subtarget.hasPrefixInstrs()) { + if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) { SDValue SplatInsertNode; if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG))) return SplatInsertNode; @@ -17730,7 +17730,7 @@ bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, return false; case MVT::f32: case MVT::f64: { - if (Subtarget.hasPrefixInstrs()) { + if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) { // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP. return true; } @@ -18314,11 +18314,12 @@ unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N, // Compute subtarget flags. if (!Subtarget.hasP9Vector()) FlagSet |= PPC::MOF_SubtargetBeforeP9; - else { + else FlagSet |= PPC::MOF_SubtargetP9; - if (Subtarget.hasPrefixInstrs()) - FlagSet |= PPC::MOF_SubtargetP10; - } + + if (Subtarget.hasPrefixInstrs()) + FlagSet |= PPC::MOF_SubtargetP10; + if (Subtarget.hasSPE()) FlagSet |= PPC::MOF_SubtargetSPE; diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td index 5f2937d47a519..2fd5978a23c80 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrP10.td +++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td @@ -654,13 +654,10 @@ let Predicates = [PrefixInstrs] in { (ins s34imm:$SI), "pli $RT, $SI", IIC_IntSimple, []>; } +} +let Predicates = [PrefixInstrs, HasFPU] in { let mayLoad = 1, mayStore = 0 in { - defm PLXV : - 8LS_DForm_R_SI34_XT6_RA5_MEM_p<25, (outs vsrc:$XST), (ins (memri34 $D, $RA):$addr), - (ins (memri34_pcrel $D, $RA):$addr), - (ins s34imm_pcrel:$D), - "plxv $XST, $addr", "plxv $XST, $D", IIC_LdStLFD>; defm PLFS : MLS_DForm_R_SI34_RTA5_MEM_p<48, (outs f4rc:$RST), (ins (memri34 $D, $RA):$addr), (ins (memri34_pcrel $D, $RA):$addr), @@ -671,6 +668,28 @@ let Predicates = [PrefixInstrs] in { (ins (memri34_pcrel $D, $RA):$addr), (ins s34imm_pcrel:$D), "plfd $RST, $addr", "plfd $RST, $D", IIC_LdStLFD>; + } + let mayStore = 1, mayLoad = 0 in { + defm PSTFS : + MLS_DForm_R_SI34_RTA5_MEM_p<52, (outs), (ins f4rc:$RST, (memri34 $D, $RA):$addr), + (ins f4rc:$RST, (memri34_pcrel $D, $RA):$addr), + (ins f4rc:$RST, s34imm_pcrel:$D), + "pstfs $RST, $addr", "pstfs $RST, $D", IIC_LdStLFD>; + defm PSTFD : + MLS_DForm_R_SI34_RTA5_MEM_p<54, (outs), (ins f8rc:$RST, (memri34 $D, $RA):$addr), + (ins f8rc:$RST, (memri34_pcrel $D, $RA):$addr), + (ins f8rc:$RST, s34imm_pcrel:$D), + "pstfd $RST, $addr", "pstfd $RST, $D", IIC_LdStLFD>; + } +} + +let Predicates = [PrefixInstrs, HasP10Vector] in { + let mayLoad = 1, mayStore = 0 in { + defm PLXV : + 8LS_DForm_R_SI34_XT6_RA5_MEM_p<25, (outs vsrc:$XST), (ins (memri34 $D, $RA):$addr), + (ins (memri34_pcrel $D, $RA):$addr), + (ins s34imm_pcrel:$D), + "plxv $XST, $addr", "plxv $XST, $D", IIC_LdStLFD>; defm PLXSSP : 8LS_DForm_R_SI34_RTA5_MEM_p<43, (outs vfrc:$RST), (ins (memri34 $D, $RA):$addr), (ins (memri34_pcrel $D, $RA):$addr), @@ -683,6 +702,28 @@ let Predicates = [PrefixInstrs] in { (ins s34imm_pcrel:$D), "plxsd $RST, $addr", "plxsd $RST, $D", IIC_LdStLFD>; + } + let mayStore = 1, mayLoad = 0 in { + defm PSTXV : + 8LS_DForm_R_SI34_XT6_RA5_MEM_p<27, (outs), (ins vsrc:$XST, (memri34 $D, $RA):$addr), + (ins vsrc:$XST, (memri34_pcrel $D, $RA):$addr), + (ins vsrc:$XST, s34imm_pcrel:$D), + "pstxv $XST, $addr", "pstxv $XST, $D", IIC_LdStLFD>; + defm PSTXSSP : + 8LS_DForm_R_SI34_RTA5_MEM_p<47, (outs), (ins vfrc:$RST, (memri34 $D, $RA):$addr), + (ins vfrc:$RST, (memri34_pcrel $D, $RA):$addr), + (ins vfrc:$RST, s34imm_pcrel:$D), + "pstxssp $RST, $addr", "pstxssp $RST, $D", IIC_LdStLFD>; + defm PSTXSD : + 8LS_DForm_R_SI34_RTA5_MEM_p<46, (outs), (ins vfrc:$RST, (memri34 $D, $RA):$addr), + (ins vfrc:$RST, (memri34_pcrel $D, $RA):$addr), + (ins vfrc:$RST, s34imm_pcrel:$D), + "pstxsd $RST, $addr", "pstxsd $RST, $D", IIC_LdStLFD>; + } +} + +let Predicates = [PrefixInstrs] in { + let mayLoad = 1, mayStore = 0 in { let Interpretation64Bit = 1, isCodeGenOnly = 1 in { defm PLBZ8 : MLS_DForm_R_SI34_RTA5_MEM_p<34, (outs g8rc:$RST), (ins (memri34 $D, $RA):$addr), @@ -745,31 +786,6 @@ let Predicates = [PrefixInstrs] in { } let mayStore = 1, mayLoad = 0 in { - defm PSTXV : - 8LS_DForm_R_SI34_XT6_RA5_MEM_p<27, (outs), (ins vsrc:$XST, (memri34 $D, $RA):$addr), - (ins vsrc:$XST, (memri34_pcrel $D, $RA):$addr), - (ins vsrc:$XST, s34imm_pcrel:$D), - "pstxv $XST, $addr", "pstxv $XST, $D", IIC_LdStLFD>; - defm PSTFS : - MLS_DForm_R_SI34_RTA5_MEM_p<52, (outs), (ins f4rc:$RST, (memri34 $D, $RA):$addr), - (ins f4rc:$RST, (memri34_pcrel $D, $RA):$addr), - (ins f4rc:$RST, s34imm_pcrel:$D), - "pstfs $RST, $addr", "pstfs $RST, $D", IIC_LdStLFD>; - defm PSTFD : - MLS_DForm_R_SI34_RTA5_MEM_p<54, (outs), (ins f8rc:$RST, (memri34 $D, $RA):$addr), - (ins f8rc:$RST, (memri34_pcrel $D, $RA):$addr), - (ins f8rc:$RST, s34imm_pcrel:$D), - "pstfd $RST, $addr", "pstfd $RST, $D", IIC_LdStLFD>; - defm PSTXSSP : - 8LS_DForm_R_SI34_RTA5_MEM_p<47, (outs), (ins vfrc:$RST, (memri34 $D, $RA):$addr), - (ins vfrc:$RST, (memri34_pcrel $D, $RA):$addr), - (ins vfrc:$RST, s34imm_pcrel:$D), - "pstxssp $RST, $addr", "pstxssp $RST, $D", IIC_LdStLFD>; - defm PSTXSD : - 8LS_DForm_R_SI34_RTA5_MEM_p<46, (outs), (ins vfrc:$RST, (memri34 $D, $RA):$addr), - (ins vfrc:$RST, (memri34_pcrel $D, $RA):$addr), - (ins vfrc:$RST, s34imm_pcrel:$D), - "pstxsd $RST, $addr", "pstxsd $RST, $D", IIC_LdStLFD>; let Interpretation64Bit = 1, isCodeGenOnly = 1 in { defm PSTB8 : MLS_DForm_R_SI34_RTA5_MEM_p<38, (outs), (ins g8rc:$RST, (memri34 $D, $RA):$addr), @@ -1136,7 +1152,7 @@ let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops] in { []>; } -let mayLoad = 1, mayStore = 0, Predicates = [PairedVectorMemops, PrefixInstrs] in { +let mayLoad = 1, mayStore = 0, Predicates = [PairedVectorMemops, PrefixInstrs, HasP10Vector] in { defm PLXVP : 8LS_DForm_R_XTp5_SI34_MEM_p<58, (outs vsrprc:$XTp), (ins (memri34 $D, $RA):$addr), (ins (memri34_pcrel $D, $RA):$addr), @@ -1145,7 +1161,7 @@ let mayLoad = 1, mayStore = 0, Predicates = [PairedVectorMemops, PrefixInstrs] i IIC_LdStLFD>; } -let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops, PrefixInstrs] in { +let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops, PrefixInstrs, HasP10Vector] in { defm PSTXVP : 8LS_DForm_R_XTp5_SI34_MEM_p<62, (outs), (ins vsrprc:$XTp, (memri34 $D, $RA):$addr), (ins vsrprc:$XTp, (memri34_pcrel $D, $RA):$addr), @@ -1157,7 +1173,7 @@ let Predicates = [PairedVectorMemops] in { // Intrinsics for Paired Vector Loads. def : Pat<(v256i1 (int_ppc_vsx_lxvp DQForm:$src)), (LXVP memrix16:$src)>; def : Pat<(v256i1 (int_ppc_vsx_lxvp XForm:$src)), (LXVPX XForm:$src)>; - let Predicates = [PairedVectorMemops, PrefixInstrs] in { + let Predicates = [PairedVectorMemops, PrefixInstrs, HasP10Vector] in { def : Pat<(v256i1 (int_ppc_vsx_lxvp PDForm:$src)), (PLXVP memri34:$src)>; } // Intrinsics for Paired Vector Stores. @@ -1165,7 +1181,7 @@ let Predicates = [PairedVectorMemops] in { (STXVP $XSp, memrix16:$dst)>; def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, XForm:$dst), (STXVPX $XSp, XForm:$dst)>; - let Predicates = [PairedVectorMemops, PrefixInstrs] in { + let Predicates = [PairedVectorMemops, PrefixInstrs, HasP10Vector] in { def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, PDForm:$dst), (PSTXVP $XSp, memri34:$dst)>; } @@ -1236,6 +1252,9 @@ let Predicates = [PCRelativeMemops] in { def : Pat<(store i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTDpc $RS, $ga, 0)>; +} + +let Predicates = [PCRelativeMemops, HasFPU] in { // Load f32 def : Pat<(f32 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLFSpc $addr, 0)>; @@ -1252,6 +1271,11 @@ let Predicates = [PCRelativeMemops] in { def : Pat<(store f64:$FRS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTFDpc $FRS, $ga, 0)>; + def : Pat<(v4f32 (PPCldvsxlh (PPCmatpcreladdr PCRelForm:$addr))), + (SUBREG_TO_REG (i64 1), (PLFDpc $addr, 0), sub_64)>; +} + +let Predicates = [PCRelativeMemops, HasP10Vector] in { // Load f128 def : Pat<(f128 (load (PPCmatpcreladdr PCRelForm:$addr))), (COPY_TO_REGCLASS (PLXVpc $addr, 0), VRRC)>; @@ -1288,6 +1312,14 @@ let Predicates = [PCRelativeMemops] in { def : Pat<(store v2f64:$XS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTXVpc $XS, $ga, 0)>; + // Special Cases For PPCstore_scal_int_from_vsr + def : Pat<(PPCstore_scal_int_from_vsr f64:$src, (PPCmatpcreladdr PCRelForm:$dst), 8), + (PSTXSDpc $src, $dst, 0)>; + def : Pat<(PPCstore_scal_int_from_vsr f128:$src, (PPCmatpcreladdr PCRelForm:$dst), 8), + (PSTXSDpc (COPY_TO_REGCLASS $src, VFRC), $dst, 0)>; +} + +let Predicates = [PCRelativeMemops] in { // Atomic Load def : Pat<(i32 (atomic_load_8 (PPCmatpcreladdr PCRelForm:$ga))), (PLBZpc $ga, 0)>; @@ -1314,15 +1346,6 @@ let Predicates = [PCRelativeMemops] in { def : Pat<(atomic_store_64 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)), (PSTDpc $RS, $ga, 0)>; - // Special Cases For PPCstore_scal_int_from_vsr - def : Pat<(PPCstore_scal_int_from_vsr f64:$src, (PPCmatpcreladdr PCRelForm:$dst), 8), - (PSTXSDpc $src, $dst, 0)>; - def : Pat<(PPCstore_scal_int_from_vsr f128:$src, (PPCmatpcreladdr PCRelForm:$dst), 8), - (PSTXSDpc (COPY_TO_REGCLASS $src, VFRC), $dst, 0)>; - - def : Pat<(v4f32 (PPCldvsxlh (PPCmatpcreladdr PCRelForm:$addr))), - (SUBREG_TO_REG (i64 1), (PLFDpc $addr, 0), sub_64)>; - // If the PPCmatpcreladdr node is not caught by any other pattern it should be // caught here and turned into a paddi instruction to materialize the address. def : Pat<(PPCmatpcreladdr PCRelForm:$addr), (PADDI8pc 0, $addr)>; @@ -1335,7 +1358,7 @@ let Predicates = [PCRelativeMemops] in { (PADDI8 $in, $addr)>; } -let Predicates = [PrefixInstrs] in { +let Predicates = [PrefixInstrs, HasP10Vector] in { def XXPERMX : 8RR_XX4Form_IMM3_XTABC6<34, 0, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC, u3imm:$IMM), @@ -2142,7 +2165,7 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in { class xxevalPattern imm> : Pat<(v4i32 pattern), (XXEVAL $vA, $vB, $vC, imm)> {} -let AddedComplexity = 400, Predicates = [PrefixInstrs] in { +let AddedComplexity = 400, Predicates = [PrefixInstrs, HasP10Vector] in { def : Pat<(v4i32 (build_vector i32immNonAllOneNonZero:$A, i32immNonAllOneNonZero:$A, i32immNonAllOneNonZero:$A, @@ -2279,7 +2302,7 @@ def : Pat<(f64 nzFPImmAsi64:$A), (PSTXSD (COPY_TO_REGCLASS $src, VFRC), PDForm:$dst)>; } -let Predicates = [PrefixInstrs] in { +let Predicates = [PrefixInstrs, HasP10Vector] in { def : Pat<(i32 imm34:$imm), (PLI (getImmAs64BitInt imm:$imm))>; def : Pat<(i64 imm34:$imm), (PLI8 (getImmAs64BitInt imm:$imm))>; def : Pat<(v16i8 (int_ppc_vsx_xxpermx v16i8:$A, v16i8:$B, v16i8:$C, timm:$D)), @@ -2300,7 +2323,9 @@ let Predicates = [PrefixInstrs] in { (XXBLENDVW $A, $B, $C)>; def : Pat<(int_ppc_vsx_xxblendvd v2i64:$A, v2i64:$B, v2i64:$C), (XXBLENDVD $A, $B, $C)>; +} +let Predicates = [PrefixInstrs] in { // Anonymous patterns to select prefixed loads and stores. // Load i32 def : Pat<(i32 (extloadi1 PDForm:$src)), (PLBZ memri34:$src)>; @@ -2335,7 +2360,9 @@ let Predicates = [PrefixInstrs] in { def : Pat<(truncstorei16 i64:$rS, PDForm:$dst), (PSTH8 g8rc:$rS, memri34:$dst)>; def : Pat<(truncstorei32 i64:$rS, PDForm:$dst), (PSTW8 g8rc:$rS, memri34:$dst)>; def : Pat<(store i64:$rS, PDForm:$dst), (PSTD g8rc:$rS, memri34:$dst)>; +} +let Predicates = [PrefixInstrs, HasFPU] in { // Load / Store f32 def : Pat<(f32 (load PDForm:$src)), (PLFS memri34:$src)>; def : Pat<(store f32:$FRS, PDForm:$dst), (PSTFS $FRS, memri34:$dst)>; @@ -2345,7 +2372,13 @@ let Predicates = [PrefixInstrs] in { (COPY_TO_REGCLASS (PLFS memri34:$src), VSFRC)>; def : Pat<(f64 (load PDForm:$src)), (PLFD memri34:$src)>; def : Pat<(store f64:$FRS, PDForm:$dst), (PSTFD $FRS, memri34:$dst)>; + // Prefixed fpext to v2f64 + def : Pat<(v4f32 (PPCldvsxlh PDForm:$src)), + (SUBREG_TO_REG (i64 1), (PLFD PDForm:$src), sub_64)>; +} + +let Predicates = [PrefixInstrs] in { // Atomic Load def : Pat<(i32 (atomic_load_8 PDForm:$src)), (PLBZ memri34:$src)>; def : Pat<(i32 (atomic_load_16 PDForm:$src)), (PLHZ memri34:$src)>; @@ -2357,10 +2390,6 @@ let Predicates = [PrefixInstrs] in { def : Pat<(atomic_store_16 i32:$RS, PDForm:$dst), (PSTH $RS, memri34:$dst)>; def : Pat<(atomic_store_32 i32:$RS, PDForm:$dst), (PSTW $RS, memri34:$dst)>; def : Pat<(atomic_store_64 i64:$RS, PDForm:$dst), (PSTD $RS, memri34:$dst)>; - - // Prefixed fpext to v2f64 - def : Pat<(v4f32 (PPCldvsxlh PDForm:$src)), - (SUBREG_TO_REG (i64 1), (PLFD PDForm:$src), sub_64)>; } def InsertEltShift { diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 7e4cd6c72aa87..9e8da59615dfb 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -1695,7 +1695,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // transform it to the prefixed version so we don't have to use the XForm. if ((OpC == PPC::LXVP || OpC == PPC::STXVP) && (!isInt<16>(Offset) || (Offset % offsetMinAlign(MI)) != 0) && - Subtarget.hasPrefixInstrs()) { + Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) { unsigned NewOpc = OpC == PPC::LXVP ? PPC::PLXVP : PPC::PSTXVP; MI.setDesc(TII.get(NewOpc)); OpC = NewOpc; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index f0e5a7d393b6c..e99c6208594e3 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3125,7 +3125,7 @@ lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, Chain = Unorder.getValue(1); Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL, DAG.getVTList(ContainerVT, MVT::Other), - {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL}); + {Chain, Src, Src, Src, Unorder, VL}); Chain = Src.getValue(1); // We do the conversion on the absolute value and fix the sign at the end. @@ -13704,6 +13704,44 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, return SDValue(); } +// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) -> +// (bitcast (sra (v2Xi16 (bitcast X)), 15)) +// Same for other equivalent types with other equivalent constants. +static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Do this for legal vectors unless they are i1 or i8 vectors. + if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16) + return SDValue(); + + if (N->getOperand(0).getOpcode() != ISD::AND || + N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL) + return SDValue(); + + SDValue And = N->getOperand(0); + SDValue Srl = And.getOperand(0); + + APInt V1, V2, V3; + if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) || + !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) || + !ISD::isConstantSplatVector(Srl.getOperand(1).getNode(), V3)) + return SDValue(); + + unsigned HalfSize = VT.getScalarSizeInBits() / 2; + if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) || + V3 != (HalfSize - 1)) + return SDValue(); + + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), + EVT::getIntegerVT(*DAG.getContext(), HalfSize), + VT.getVectorElementCount() * 2); + SDLoc DL(N); + SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0)); + SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast, + DAG.getConstant(HalfSize - 1, DL, HalfVT)); + return DAG.getNode(ISD::BITCAST, DL, VT, Sra); +} static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, @@ -13748,6 +13786,9 @@ static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineBinOpOfZExt(N, DAG)) return V; + if (SDValue V = combineVectorMulToSraBitcast(N, DAG)) + return V; + return SDValue(); } @@ -16087,6 +16128,57 @@ static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, return true; } +static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG) { + // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1)) + // This would be benefit for the cases where X and Y are both the same value + // type of low precision vectors. Since the truncate would be lowered into + // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate + // restriction, such pattern would be expanded into a series of "vsetvli" + // and "vnsrl" instructions later to reach this point. + auto IsTruncNode = [](SDValue V) { + if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL) + return false; + SDValue VL = V.getOperand(2); + auto *C = dyn_cast(VL); + // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand + bool IsVLMAXForVMSET = (C && C->isAllOnes()) || + (isa(VL) && + cast(VL)->getReg() == RISCV::X0); + return V.getOperand(1).getOpcode() == RISCVISD::VMSET_VL && IsVLMAXForVMSET; + }; + + SDValue Op = N->getOperand(0); + + // We need to first find the inner level of TRUNCATE_VECTOR_VL node + // to distinguish such pattern. + while (IsTruncNode(Op)) { + if (!Op.hasOneUse()) + return SDValue(); + Op = Op.getOperand(0); + } + + if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse()) + return SDValue(); + + SDValue N0 = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() || + N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse()) + return SDValue(); + + SDValue N00 = N0.getOperand(0); + SDValue N10 = N1.getOperand(0); + if (!N00.getValueType().isVector() || + N00.getValueType() != N10.getValueType() || + N->getValueType(0) != N10.getValueType()) + return SDValue(); + + unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1; + SDValue SMin = + DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10, + DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0))); + return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin); +} SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { @@ -16304,56 +16396,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, } } return SDValue(); - case RISCVISD::TRUNCATE_VECTOR_VL: { - // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1)) - // This would be benefit for the cases where X and Y are both the same value - // type of low precision vectors. Since the truncate would be lowered into - // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate - // restriction, such pattern would be expanded into a series of "vsetvli" - // and "vnsrl" instructions later to reach this point. - auto IsTruncNode = [](SDValue V) { - if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL) - return false; - SDValue VL = V.getOperand(2); - auto *C = dyn_cast(VL); - // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand - bool IsVLMAXForVMSET = (C && C->isAllOnes()) || - (isa(VL) && - cast(VL)->getReg() == RISCV::X0); - return V.getOperand(1).getOpcode() == RISCVISD::VMSET_VL && - IsVLMAXForVMSET; - }; - - SDValue Op = N->getOperand(0); - - // We need to first find the inner level of TRUNCATE_VECTOR_VL node - // to distinguish such pattern. - while (IsTruncNode(Op)) { - if (!Op.hasOneUse()) - return SDValue(); - Op = Op.getOperand(0); - } - - if (Op.getOpcode() == ISD::SRA && Op.hasOneUse()) { - SDValue N0 = Op.getOperand(0); - SDValue N1 = Op.getOperand(1); - if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() && - N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) { - SDValue N00 = N0.getOperand(0); - SDValue N10 = N1.getOperand(0); - if (N00.getValueType().isVector() && - N00.getValueType() == N10.getValueType() && - N->getValueType(0) == N10.getValueType()) { - unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1; - SDValue SMin = DAG.getNode( - ISD::SMIN, SDLoc(N1), N->getValueType(0), N10, - DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0))); - return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin); - } - } - } - break; - } + case RISCVISD::TRUNCATE_VECTOR_VL: + return combineTruncOfSraSext(N, DAG); case ISD::TRUNCATE: return performTRUNCATECombine(N, DAG, Subtarget); case ISD::SELECT: diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index c0b2a695b8ea4..2c0a807e44685 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -882,7 +882,7 @@ class RISCVInsertVSETVLI : public MachineFunctionPass { StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; } private: - bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require, + bool needVSETVLI(const DemandedFields &Used, const VSETVLIInfo &Require, const VSETVLIInfo &CurInfo) const; bool needVSETVLIPHI(const VSETVLIInfo &Require, const MachineBasicBlock &MBB) const; @@ -1175,17 +1175,13 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, } /// Return true if a VSETVLI is required to transition from CurInfo to Require -/// before MI. -bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI, +/// given a set of DemandedFields \p Used. +bool RISCVInsertVSETVLI::needVSETVLI(const DemandedFields &Used, const VSETVLIInfo &Require, const VSETVLIInfo &CurInfo) const { - assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, *ST, LIS)); - if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly()) return true; - DemandedFields Used = getDemanded(MI, ST); - if (CurInfo.isCompatible(Used, Require, LIS)) return false; @@ -1232,16 +1228,17 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, if (!RISCVII::hasSEWOp(TSFlags)) return; + DemandedFields Demanded = getDemanded(MI, ST); + const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, *ST, LIS); assert(NewInfo.isValid() && !NewInfo.isUnknown()); - if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info)) + if (Info.isValid() && !needVSETVLI(Demanded, NewInfo, Info)) return; const VSETVLIInfo PrevInfo = Info; if (!Info.isValid() || Info.isUnknown()) Info = NewInfo; - DemandedFields Demanded = getDemanded(MI, ST); const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded); // If MI only demands that VL has the same zeroness, we only need to set the diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index ce50fe6e2cbb0..a1b078910e29c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1311,6 +1311,26 @@ def : Pat<(FrameAddrRegImm (iPTR GPR:$rs1), simm12:$imm12), /// HI and ADD_LO address nodes. +// Pseudo for a rematerializable LUI+ADDI sequence for loading an address. +// It will be expanded after register allocation. +// FIXME: The scheduling information does not reflect the multiple instructions. +let Size = 8, isReMaterializable = 1 in +def PseudoMovAddr : Pseudo<(outs GPR:$dst), (ins uimm20_lui:$hi, simm12:$lo), []>, + Sched<[WriteIALU]>; + +def riscv_hi_oneuse : unop_oneuse; +def addr_hi_lo : PatFrag<(ops node:$hi, node:$lo), + (riscv_add_lo (riscv_hi_oneuse node:$hi), node:$lo)>; + +def : Pat<(addr_hi_lo tglobaladdr:$hi, tglobaladdr:$lo), + (PseudoMovAddr tglobaladdr:$hi, tglobaladdr:$lo)>; +def : Pat<(addr_hi_lo tblockaddress:$hi, tblockaddress:$lo), + (PseudoMovAddr tblockaddress:$hi, tblockaddress:$lo)>; +def : Pat<(addr_hi_lo tjumptable:$hi, tjumptable:$lo), + (PseudoMovAddr tjumptable:$hi, tjumptable:$lo)>; +def : Pat<(addr_hi_lo tconstpool:$hi, tconstpool:$lo), + (PseudoMovAddr tconstpool:$hi, tconstpool:$lo)>; + def : Pat<(riscv_hi tglobaladdr:$in), (LUI tglobaladdr:$in)>; def : Pat<(riscv_hi tblockaddress:$in), (LUI tblockaddress:$in)>; def : Pat<(riscv_hi tjumptable:$in), (LUI tjumptable:$in)>; diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp index 410989177a8b9..fecc83a821f42 100644 --- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp +++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp @@ -84,7 +84,8 @@ INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, DEBUG_TYPE, // 3) The offset value in the Global Address or Constant Pool is 0. bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi, MachineInstr *&Lo) { - if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC) + if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC && + Hi.getOpcode() != RISCV::PseudoMovAddr) return false; const MachineOperand &HiOp1 = Hi.getOperand(1); @@ -97,16 +98,22 @@ bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi, HiOp1.getOffset() != 0) return false; - Register HiDestReg = Hi.getOperand(0).getReg(); - if (!MRI->hasOneUse(HiDestReg)) - return false; + if (Hi.getOpcode() == RISCV::PseudoMovAddr) { + // Most of the code should handle it correctly without modification by + // setting Lo and Hi both point to PseudoMovAddr + Lo = &Hi; + } else { + Register HiDestReg = Hi.getOperand(0).getReg(); + if (!MRI->hasOneUse(HiDestReg)) + return false; - Lo = &*MRI->use_instr_begin(HiDestReg); - if (Lo->getOpcode() != RISCV::ADDI) - return false; + Lo = &*MRI->use_instr_begin(HiDestReg); + if (Lo->getOpcode() != RISCV::ADDI) + return false; + } const MachineOperand &LoOp2 = Lo->getOperand(2); - if (Hi.getOpcode() == RISCV::LUI) { + if (Hi.getOpcode() == RISCV::LUI || Hi.getOpcode() == RISCV::PseudoMovAddr) { if (LoOp2.getTargetFlags() != RISCVII::MO_LO || !(LoOp2.isGlobal() || LoOp2.isCPI() || LoOp2.isBlockAddress()) || LoOp2.getOffset() != 0) @@ -466,6 +473,13 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi, Hi.getOperand(1).setOffset(NewOffset); MachineOperand &ImmOp = Lo.getOperand(2); + // Expand PseudoMovAddr into LUI + if (Hi.getOpcode() == RISCV::PseudoMovAddr) { + auto *TII = ST->getInstrInfo(); + Hi.setDesc(TII->get(RISCV::LUI)); + Hi.removeOperand(2); + } + if (Hi.getOpcode() != RISCV::AUIPC) ImmOp.setOffset(NewOffset); @@ -501,6 +515,11 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi, } } + // Prevent Lo (originally PseudoMovAddr, which is also pointed by Hi) from + // being erased + if (&Lo == &Hi) + return true; + MRI->replaceRegWith(Lo.getOperand(0).getReg(), Hi.getOperand(0).getReg()); Lo.eraseFromParent(); return true; diff --git a/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp index 52f2ce27164d6..b7b0c47c084c6 100644 --- a/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp @@ -44,6 +44,7 @@ class RISCVPostRAExpandPseudo : public MachineFunctionPass { bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); bool expandMovImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); + bool expandMovAddr(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); }; char RISCVPostRAExpandPseudo::ID = 0; @@ -75,6 +76,8 @@ bool RISCVPostRAExpandPseudo::expandMI(MachineBasicBlock &MBB, switch (MBBI->getOpcode()) { case RISCV::PseudoMovImm: return expandMovImm(MBB, MBBI); + case RISCV::PseudoMovAddr: + return expandMovAddr(MBB, MBBI); default: return false; } @@ -101,6 +104,26 @@ bool RISCVPostRAExpandPseudo::expandMovImm(MachineBasicBlock &MBB, return true; } +bool RISCVPostRAExpandPseudo::expandMovAddr(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + DebugLoc DL = MBBI->getDebugLoc(); + + Register DstReg = MBBI->getOperand(0).getReg(); + bool DstIsDead = MBBI->getOperand(0).isDead(); + bool Renamable = MBBI->getOperand(0).isRenamable(); + + BuildMI(MBB, MBBI, DL, TII->get(RISCV::LUI)) + .addReg(DstReg, RegState::Define | getRenamableRegState(Renamable)) + .add(MBBI->getOperand(1)); + BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead) | + getRenamableRegState(Renamable)) + .addReg(DstReg, RegState::Kill | getRenamableRegState(Renamable)) + .add(MBBI->getOperand(2)); + MBBI->eraseFromParent(); + return true; +} + } // end of anonymous namespace INITIALIZE_PASS(RISCVPostRAExpandPseudo, "riscv-expand-pseudolisimm32", diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp index 5c286acdcc9b3..ff8759755e517 100644 --- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp @@ -272,6 +272,13 @@ void SPIRVInstPrinter::printOpDecorate(const MCInst *MI, raw_ostream &O) { case Decoration::UserSemantic: printStringImm(MI, NumFixedOps, O); break; + case Decoration::HostAccessINTEL: + printOperand(MI, NumFixedOps, O); + if (NumFixedOps + 1 < MI->getNumOperands()) { + O << ' '; + printStringImm(MI, NumFixedOps + 1, O); + } + break; default: printRemainingVariableOps(MI, NumFixedOps, O, true); break; diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp index 7f531542544ab..75aa1823b11f2 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp @@ -30,6 +30,13 @@ static const std::map SPIRV::Extension::Extension::SPV_EXT_shader_atomic_float_min_max}, {"SPV_INTEL_arbitrary_precision_integers", SPIRV::Extension::Extension::SPV_INTEL_arbitrary_precision_integers}, + {"SPV_INTEL_cache_controls", + SPIRV::Extension::Extension::SPV_INTEL_cache_controls}, + {"SPV_INTEL_global_variable_fpga_decorations", + SPIRV::Extension::Extension:: + SPV_INTEL_global_variable_fpga_decorations}, + {"SPV_INTEL_global_variable_host_access", + SPIRV::Extension::Extension::SPV_INTEL_global_variable_host_access}, {"SPV_INTEL_optnone", SPIRV::Extension::Extension::SPV_INTEL_optnone}, {"SPV_INTEL_usm_storage_classes", SPIRV::Extension::Extension::SPV_INTEL_usm_storage_classes}, diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index ea53fe55e7ab5..ffbd1e17bad5e 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -181,6 +181,14 @@ static void setInsertPointSkippingPhis(IRBuilder<> &B, Instruction *I) { B.SetInsertPoint(I); } +static void setInsertPointAfterDef(IRBuilder<> &B, Instruction *I) { + B.SetCurrentDebugLocation(I->getDebugLoc()); + if (I->getType()->isVoidTy()) + B.SetInsertPoint(I->getNextNode()); + else + B.SetInsertPoint(*I->getInsertionPointAfterDef()); +} + static bool requireAssignType(Instruction *I) { IntrinsicInst *Intr = dyn_cast(I); if (Intr) { @@ -560,6 +568,7 @@ void SPIRVEmitIntrinsics::preprocessUndefs(IRBuilder<> &B) { while (!Worklist.empty()) { Instruction *I = Worklist.front(); + bool BPrepared = false; Worklist.pop(); for (auto &Op : I->operands()) { @@ -567,7 +576,10 @@ void SPIRVEmitIntrinsics::preprocessUndefs(IRBuilder<> &B) { if (!AggrUndef || !Op->getType()->isAggregateType()) continue; - B.SetInsertPoint(I); + if (!BPrepared) { + setInsertPointSkippingPhis(B, I); + BPrepared = true; + } auto *IntrUndef = B.CreateIntrinsic(Intrinsic::spv_undef, {}, {}); Worklist.push(IntrUndef); I->replaceUsesOfWith(Op, IntrUndef); @@ -584,6 +596,7 @@ void SPIRVEmitIntrinsics::preprocessCompositeConstants(IRBuilder<> &B) { while (!Worklist.empty()) { auto *I = Worklist.front(); + bool IsPhi = isa(I), BPrepared = false; assert(I); bool KeepInst = false; for (const auto &Op : I->operands()) { @@ -615,7 +628,11 @@ void SPIRVEmitIntrinsics::preprocessCompositeConstants(IRBuilder<> &B) { else for (auto &COp : AggrConst->operands()) Args.push_back(COp); - B.SetInsertPoint(I); + if (!BPrepared) { + IsPhi ? B.SetInsertPointPastAllocas(I->getParent()->getParent()) + : B.SetInsertPoint(I); + BPrepared = true; + } auto *CI = B.CreateIntrinsic(Intrinsic::spv_const_composite, {ResTy}, {Args}); Worklist.push(CI); @@ -1111,8 +1128,7 @@ void SPIRVEmitIntrinsics::insertAssignPtrTypeIntrs(Instruction *I, isa(I)) return; - setInsertPointSkippingPhis(B, I->getNextNode()); - + setInsertPointAfterDef(B, I); Type *ElemTy = deduceElementType(I); Constant *EltTyConst = UndefValue::get(ElemTy); unsigned AddressSpace = getPointerAddressSpace(I->getType()); @@ -1127,7 +1143,7 @@ void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I, reportFatalOnTokenType(I); Type *Ty = I->getType(); if (!Ty->isVoidTy() && !isPointerTy(Ty) && requireAssignType(I)) { - setInsertPointSkippingPhis(B, I->getNextNode()); + setInsertPointAfterDef(B, I); Type *TypeToAssign = Ty; if (auto *II = dyn_cast(I)) { if (II->getIntrinsicID() == Intrinsic::spv_const_composite || @@ -1149,7 +1165,7 @@ void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I, if (isa(Op) && Op->getType()->isAggregateType()) buildIntrWithMD(Intrinsic::spv_assign_type, {B.getInt32Ty()}, Op, UndefValue::get(B.getInt32Ty()), {}, B); - else if (!isa(Op)) // TODO: This case could be removed + else if (!isa(Op)) buildIntrWithMD(Intrinsic::spv_assign_type, {Op->getType()}, Op, Op, {}, B); } @@ -1159,7 +1175,7 @@ void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I, void SPIRVEmitIntrinsics::insertSpirvDecorations(Instruction *I, IRBuilder<> &B) { if (MDNode *MD = I->getMetadata("spirv.Decorations")) { - B.SetInsertPoint(I->getNextNode()); + setInsertPointAfterDef(B, I); B.CreateIntrinsic(Intrinsic::spv_assign_decoration, {I->getType()}, {I, MetadataAsValue::get(I->getContext(), MD)}); } @@ -1170,7 +1186,7 @@ void SPIRVEmitIntrinsics::processInstrAfterVisit(Instruction *I, auto *II = dyn_cast(I); if (II && II->getIntrinsicID() == Intrinsic::spv_const_composite && TrackConstants) { - B.SetInsertPoint(I->getNextNode()); + setInsertPointAfterDef(B, I); auto t = AggrConsts.find(I); assert(t != AggrConsts.end()); auto *NewOp = @@ -1179,6 +1195,7 @@ void SPIRVEmitIntrinsics::processInstrAfterVisit(Instruction *I, I->replaceAllUsesWith(NewOp); NewOp->setArgOperand(0, I); } + bool IsPhi = isa(I), BPrepared = false; for (const auto &Op : I->operands()) { if ((isa(Op) && Op->getType()->isVectorTy()) || isa(I) || isa(I)) @@ -1188,11 +1205,14 @@ void SPIRVEmitIntrinsics::processInstrAfterVisit(Instruction *I, if (II && ((II->getIntrinsicID() == Intrinsic::spv_gep && OpNo == 0) || (II->paramHasAttr(OpNo, Attribute::ImmArg)))) continue; - B.SetInsertPoint(I); + if (!BPrepared) { + IsPhi ? B.SetInsertPointPastAllocas(I->getParent()->getParent()) + : B.SetInsertPoint(I); + BPrepared = true; + } Value *OpTyVal = Op; if (Op->getType()->isTargetExtTy()) - OpTyVal = Constant::getNullValue( - IntegerType::get(I->getContext(), GR->getPointerSize())); + OpTyVal = PoisonValue::get(Op->getType()); auto *NewOp = buildIntrWithMD(Intrinsic::spv_track_constant, {Op->getType(), OpTyVal->getType()}, Op, OpTyVal, {}, B); @@ -1201,7 +1221,7 @@ void SPIRVEmitIntrinsics::processInstrAfterVisit(Instruction *I, } if (I->hasName()) { reportFatalOnTokenType(I); - setInsertPointSkippingPhis(B, I->getNextNode()); + setInsertPointAfterDef(B, I); std::vector Args = {I}; addStringImm(I->getName(), B, Args); B.CreateIntrinsic(Intrinsic::spv_assign_name, {I->getType()}, Args); @@ -1345,7 +1365,7 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) { for (auto *I : Worklist) { TrackConstants = true; if (!I->getType()->isVoidTy() || isa(I)) - B.SetInsertPoint(I->getNextNode()); + setInsertPointAfterDef(B, I); // Visitors return either the original/newly created instruction for further // processing, nullptr otherwise. I = visit(*I); diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index c86ab285f354f..61f99f8d85269 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -703,6 +703,15 @@ static void addOpDecorateReqs(const MachineInstr &MI, unsigned DecIndex, static_cast(LinkageOp); if (LnkType == SPIRV::LinkageType::LinkOnceODR) Reqs.addExtension(SPIRV::Extension::SPV_KHR_linkonce_odr); + } else if (Dec == SPIRV::Decoration::CacheControlLoadINTEL || + Dec == SPIRV::Decoration::CacheControlStoreINTEL) { + Reqs.addExtension(SPIRV::Extension::SPV_INTEL_cache_controls); + } else if (Dec == SPIRV::Decoration::HostAccessINTEL) { + Reqs.addExtension(SPIRV::Extension::SPV_INTEL_global_variable_host_access); + } else if (Dec == SPIRV::Decoration::InitModeINTEL || + Dec == SPIRV::Decoration::ImplementInRegisterMapINTEL) { + Reqs.addExtension( + SPIRV::Extension::SPV_INTEL_global_variable_fpga_decorations); } } diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp index 85299a49a6b94..624899600693a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp @@ -40,6 +40,7 @@ class SPIRVPreLegalizer : public MachineFunctionPass { static void addConstantsToTrack(MachineFunction &MF, SPIRVGlobalRegistry *GR, + const SPIRVSubtarget &STI, DenseMap &TargetExtConstTypes) { MachineRegisterInfo &MRI = MF.getRegInfo(); DenseMap RegsAlreadyAddedToDT; @@ -82,8 +83,17 @@ addConstantsToTrack(MachineFunction &MF, SPIRVGlobalRegistry *GR, if (Const->getType()->isTargetExtTy()) { // remember association so that we can restore it when assign types MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); - if (SrcMI && SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) + if (SrcMI && (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT || + SrcMI->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)) TargetExtConstTypes[SrcMI] = Const->getType(); + if (Const->isNullValue()) { + MachineIRBuilder MIB(MF); + SPIRVType *ExtType = + GR->getOrCreateSPIRVType(Const->getType(), MIB); + SrcMI->setDesc(STI.getInstrInfo()->get(SPIRV::OpConstantNull)); + SrcMI->addOperand(MachineOperand::CreateReg( + GR->getSPIRVTypeID(ExtType), false)); + } } } else { RegsAlreadyAddedToDT[&MI] = Reg; @@ -394,6 +404,7 @@ generateAssignInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR, for (auto MII = std::prev(MBB->end()), Begin = MBB->begin(); !ReachedBegin;) { MachineInstr &MI = *MII; + unsigned MIOp = MI.getOpcode(); if (isSpvIntrinsic(MI, Intrinsic::spv_assign_ptr_type)) { Register Reg = MI.getOperand(1).getReg(); @@ -419,9 +430,9 @@ generateAssignInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR, if (Def->getOpcode() != TargetOpcode::G_GLOBAL_VALUE) insertAssignInstr(Reg, Ty, nullptr, GR, MIB, MF.getRegInfo()); ToErase.push_back(&MI); - } else if (MI.getOpcode() == TargetOpcode::G_CONSTANT || - MI.getOpcode() == TargetOpcode::G_FCONSTANT || - MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR) { + } else if (MIOp == TargetOpcode::G_CONSTANT || + MIOp == TargetOpcode::G_FCONSTANT || + MIOp == TargetOpcode::G_BUILD_VECTOR) { // %rc = G_CONSTANT ty Val // ===> // %cty = OpType* ty @@ -435,15 +446,15 @@ generateAssignInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR, continue; } Type *Ty = nullptr; - if (MI.getOpcode() == TargetOpcode::G_CONSTANT) { + if (MIOp == TargetOpcode::G_CONSTANT) { auto TargetExtIt = TargetExtConstTypes.find(&MI); Ty = TargetExtIt == TargetExtConstTypes.end() ? MI.getOperand(1).getCImm()->getType() : TargetExtIt->second; - } else if (MI.getOpcode() == TargetOpcode::G_FCONSTANT) { + } else if (MIOp == TargetOpcode::G_FCONSTANT) { Ty = MI.getOperand(1).getFPImm()->getType(); } else { - assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); + assert(MIOp == TargetOpcode::G_BUILD_VECTOR); Type *ElemTy = nullptr; MachineInstr *ElemMI = MRI.getVRegDef(MI.getOperand(1).getReg()); assert(ElemMI); @@ -459,7 +470,7 @@ generateAssignInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR, Ty = VectorType::get(ElemTy, NumElts, false); } insertAssignInstr(Reg, Ty, nullptr, GR, MIB, MRI); - } else if (MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE) { + } else if (MIOp == TargetOpcode::G_GLOBAL_VALUE) { propagateSPIRVType(&MI, GR, MRI, MIB); } @@ -802,7 +813,7 @@ bool SPIRVPreLegalizer::runOnMachineFunction(MachineFunction &MF) { MachineIRBuilder MIB(MF); // a registry of target extension constants DenseMap TargetExtConstTypes; - addConstantsToTrack(MF, GR, TargetExtConstTypes); + addConstantsToTrack(MF, GR, ST, TargetExtConstTypes); foldConstantsIntoIntrinsics(MF); insertBitcasts(MF, GR, MIB); generateAssignInstrs(MF, GR, MIB, TargetExtConstTypes); diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp index a8a0577f60564..7bee87d7204ed 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp @@ -22,6 +22,7 @@ #include "SPIRVSubtarget.h" #include "SPIRVTargetMachine.h" #include "SPIRVUtils.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" @@ -29,6 +30,8 @@ #include "llvm/IR/IntrinsicsSPIRV.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" +#include +#include using namespace llvm; @@ -152,6 +155,132 @@ static bool lowerIntrinsicToFunction(IntrinsicInst *Intrinsic) { return true; } +static std::string getAnnotation(Value *AnnoVal, Value *OptAnnoVal) { + if (auto *Ref = dyn_cast_or_null(AnnoVal)) + AnnoVal = Ref->getOperand(0); + if (auto *Ref = dyn_cast_or_null(OptAnnoVal)) + OptAnnoVal = Ref->getOperand(0); + + std::string Anno; + if (auto *C = dyn_cast_or_null(AnnoVal)) { + StringRef Str; + if (getConstantStringInfo(C, Str)) + Anno = Str; + } + // handle optional annotation parameter in a way that Khronos Translator do + // (collect integers wrapped in a struct) + if (auto *C = dyn_cast_or_null(OptAnnoVal); + C && C->getNumOperands()) { + Value *MaybeStruct = C->getOperand(0); + if (auto *Struct = dyn_cast(MaybeStruct)) { + for (unsigned I = 0, E = Struct->getNumOperands(); I != E; ++I) { + if (auto *CInt = dyn_cast(Struct->getOperand(I))) + Anno += (I == 0 ? ": " : ", ") + + std::to_string(CInt->getType()->getIntegerBitWidth() == 1 + ? CInt->getZExtValue() + : CInt->getSExtValue()); + } + } else if (auto *Struct = dyn_cast(MaybeStruct)) { + // { i32 i32 ... } zeroinitializer + for (unsigned I = 0, E = Struct->getType()->getStructNumElements(); + I != E; ++I) + Anno += I == 0 ? ": 0" : ", 0"; + } + } + return Anno; +} + +static SmallVector parseAnnotation(Value *I, + const std::string &Anno, + LLVMContext &Ctx, + Type *Int32Ty) { + // Try to parse the annotation string according to the following rules: + // annotation := ({kind} | {kind:value,value,...})+ + // kind := number + // value := number | string + static const std::regex R( + "\\{(\\d+)(?:[:,](\\d+|\"[^\"]*\")(?:,(\\d+|\"[^\"]*\"))*)?\\}"); + SmallVector MDs; + int Pos = 0; + for (std::sregex_iterator + It = std::sregex_iterator(Anno.begin(), Anno.end(), R), + ItEnd = std::sregex_iterator(); + It != ItEnd; ++It) { + if (It->position() != Pos) + return SmallVector{}; + Pos = It->position() + It->length(); + std::smatch Match = *It; + SmallVector MDsItem; + for (std::size_t i = 1; i < Match.size(); ++i) { + std::ssub_match SMatch = Match[i]; + std::string Item = SMatch.str(); + if (Item.length() == 0) + break; + if (Item[0] == '"') { + Item = Item.substr(1, Item.length() - 2); + // Acceptable format of the string snippet is: + static const std::regex RStr("^(\\d+)(?:,(\\d+))*$"); + if (std::smatch MatchStr; std::regex_match(Item, MatchStr, RStr)) { + for (std::size_t SubIdx = 1; SubIdx < MatchStr.size(); ++SubIdx) + if (std::string SubStr = MatchStr[SubIdx].str(); SubStr.length()) + MDsItem.push_back(ConstantAsMetadata::get( + ConstantInt::get(Int32Ty, std::stoi(SubStr)))); + } else { + MDsItem.push_back(MDString::get(Ctx, Item)); + } + } else if (int32_t Num; + std::from_chars(Item.data(), Item.data() + Item.size(), Num) + .ec == std::errc{}) { + MDsItem.push_back( + ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Num))); + } else { + MDsItem.push_back(MDString::get(Ctx, Item)); + } + } + if (MDsItem.size() == 0) + return SmallVector{}; + MDs.push_back(MDNode::get(Ctx, MDsItem)); + } + return Pos == static_cast(Anno.length()) ? MDs + : SmallVector{}; +} + +static void lowerPtrAnnotation(IntrinsicInst *II) { + LLVMContext &Ctx = II->getContext(); + Type *Int32Ty = Type::getInt32Ty(Ctx); + + // Retrieve an annotation string from arguments. + Value *PtrArg = nullptr; + if (auto *BI = dyn_cast(II->getArgOperand(0))) + PtrArg = BI->getOperand(0); + else + PtrArg = II->getOperand(0); + std::string Anno = + getAnnotation(II->getArgOperand(1), + 4 < II->arg_size() ? II->getArgOperand(4) : nullptr); + + // Parse the annotation. + SmallVector MDs = parseAnnotation(II, Anno, Ctx, Int32Ty); + + // If the annotation string is not parsed successfully we don't know the + // format used and output it as a general UserSemantic decoration. + // Otherwise MDs is a Metadata tuple (a decoration list) in the format + // expected by `spirv.Decorations`. + if (MDs.size() == 0) { + auto UserSemantic = ConstantAsMetadata::get(ConstantInt::get( + Int32Ty, static_cast(SPIRV::Decoration::UserSemantic))); + MDs.push_back(MDNode::get(Ctx, {UserSemantic, MDString::get(Ctx, Anno)})); + } + + // Build the internal intrinsic function. + IRBuilder<> IRB(II->getParent()); + IRB.SetInsertPoint(II); + IRB.CreateIntrinsic( + Intrinsic::spv_assign_decoration, {PtrArg->getType()}, + {PtrArg, MetadataAsValue::get(Ctx, MDNode::get(Ctx, MDs))}); + II->replaceAllUsesWith(II->getOperand(0)); +} + static void lowerFunnelShifts(IntrinsicInst *FSHIntrinsic) { // Get a separate function - otherwise, we'd have to rework the CFG of the // current one. Then simply replace the intrinsic uses with a call to the new @@ -334,6 +463,10 @@ bool SPIRVPrepareFunctions::substituteIntrinsicCalls(Function *F) { Changed |= toSpvOverloadedIntrinsic( II, Intrinsic::SPVIntrinsics::spv_lifetime_end, {1}); break; + case Intrinsic::ptr_annotation: + lowerPtrAnnotation(II); + Changed = true; + break; } } } diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td index 98cbd9d2c1f2e..65b48c8acf6ab 100644 --- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td +++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td @@ -299,6 +299,9 @@ defm SPV_INTEL_function_pointers : ExtensionOperand<104>; defm SPV_INTEL_variable_length_array : ExtensionOperand<105>; defm SPV_INTEL_bfloat16_conversion : ExtensionOperand<106>; defm SPV_INTEL_inline_assembly : ExtensionOperand<107>; +defm SPV_INTEL_cache_controls : ExtensionOperand<108>; +defm SPV_INTEL_global_variable_host_access : ExtensionOperand<109>; +defm SPV_INTEL_global_variable_fpga_decorations : ExtensionOperand<110>; //===----------------------------------------------------------------------===// // Multiclass used to define Capabilities enum values and at the same time @@ -471,6 +474,10 @@ defm VariableLengthArrayINTEL : CapabilityOperand<5817, 0, 0, [SPV_INTEL_variabl defm GroupUniformArithmeticKHR : CapabilityOperand<6400, 0, 0, [SPV_KHR_uniform_group_instructions], []>; defm USMStorageClassesINTEL : CapabilityOperand<5935, 0, 0, [SPV_INTEL_usm_storage_classes], [Kernel]>; defm BFloat16ConversionINTEL : CapabilityOperand<6115, 0, 0, [SPV_INTEL_bfloat16_conversion], []>; +defm GlobalVariableHostAccessINTEL : CapabilityOperand<6187, 0, 0, [SPV_INTEL_global_variable_host_access], []>; +defm HostAccessINTEL : CapabilityOperand<6188, 0, 0, [SPV_INTEL_global_variable_host_access], []>; +defm GlobalVariableFPGADecorationsINTEL : CapabilityOperand<6189, 0, 0, [SPV_INTEL_global_variable_fpga_decorations], []>; +defm CacheControlsINTEL : CapabilityOperand<6441, 0, 0, [SPV_INTEL_cache_controls], []>; //===----------------------------------------------------------------------===// // Multiclass used to define SourceLanguage enum values and at the same time @@ -1206,6 +1213,11 @@ defm ReferencedIndirectlyINTEL : DecorationOperand<5602, 0, 0, [], [IndirectRefe defm ClobberINTEL : DecorationOperand<5607, 0, 0, [SPV_INTEL_inline_assembly], [AsmINTEL]>; defm SideEffectsINTEL : DecorationOperand<5608, 0, 0, [SPV_INTEL_inline_assembly], [AsmINTEL]>; defm ArgumentAttributeINTEL : DecorationOperand<6409, 0, 0, [], [FunctionPointersINTEL]>; +defm CacheControlLoadINTEL : DecorationOperand<6442, 0, 0, [], [CacheControlsINTEL]>; +defm CacheControlStoreINTEL : DecorationOperand<6443, 0, 0, [], [CacheControlsINTEL]>; +defm HostAccessINTEL : DecorationOperand<6188, 0, 0, [], [GlobalVariableHostAccessINTEL]>; +defm InitModeINTEL : DecorationOperand<6190, 0, 0, [], [GlobalVariableFPGADecorationsINTEL]>; +defm ImplementInRegisterMapINTEL : DecorationOperand<6191, 0, 0, [], [GlobalVariableFPGADecorationsINTEL]>; //===----------------------------------------------------------------------===// // Multiclass used to define BuiltIn enum values and at the same time diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp index e8f58a19d25e3..71dfe1062956e 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp @@ -54,6 +54,13 @@ cl::opt // setjmp/longjmp handling using wasm EH instrutions cl::opt WebAssembly::WasmEnableSjLj( "wasm-enable-sjlj", cl::desc("WebAssembly setjmp/longjmp handling")); +// Whether we use the new exnref Wasm EH proposal adopted on Oct 2023. +// Should be used with -wasm-enable-eh. +// Currently set to false by default, but will later change to true and then +// later can be removed after the legacy WAsm EH instructions are removed. +cl::opt WebAssembly::WasmEnableExnref( + "wasm-enable-exnref", cl::desc("WebAssembly exception handling (exnref)"), + cl::init(false)); static MCAsmInfo *createMCAsmInfo(const MCRegisterInfo & /*MRI*/, const Triple &TT, diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 34502170a5c71..7f1a5f616ed48 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -44,6 +44,7 @@ extern cl::opt WasmEnableEmEH; // asm.js-style EH extern cl::opt WasmEnableEmSjLj; // asm.js-style SjLJ extern cl::opt WasmEnableEH; // EH using Wasm EH instructions extern cl::opt WasmEnableSjLj; // SjLj using Wasm EH instructions +extern cl::opt WasmEnableExnref; // EH using new Wasm EH (exnref) enum OperandType { /// Basic block label in a branch construct. @@ -355,6 +356,8 @@ inline bool isArgument(unsigned Opc) { case WebAssembly::ARGUMENT_funcref_S: case WebAssembly::ARGUMENT_externref: case WebAssembly::ARGUMENT_externref_S: + case WebAssembly::ARGUMENT_exnref: + case WebAssembly::ARGUMENT_exnref_S: return true; default: return false; @@ -377,6 +380,8 @@ inline bool isCopy(unsigned Opc) { case WebAssembly::COPY_FUNCREF_S: case WebAssembly::COPY_EXTERNREF: case WebAssembly::COPY_EXTERNREF_S: + case WebAssembly::COPY_EXNREF: + case WebAssembly::COPY_EXNREF_S: return true; default: return false; @@ -399,6 +404,8 @@ inline bool isTee(unsigned Opc) { case WebAssembly::TEE_FUNCREF_S: case WebAssembly::TEE_EXTERNREF: case WebAssembly::TEE_EXTERNREF_S: + case WebAssembly::TEE_EXNREF: + case WebAssembly::TEE_EXNREF_S: return true; default: return false; @@ -489,6 +496,8 @@ inline bool isLocalGet(unsigned Opc) { case WebAssembly::LOCAL_GET_FUNCREF_S: case WebAssembly::LOCAL_GET_EXTERNREF: case WebAssembly::LOCAL_GET_EXTERNREF_S: + case WebAssembly::LOCAL_GET_EXNREF: + case WebAssembly::LOCAL_GET_EXNREF_S: return true; default: return false; @@ -511,6 +520,8 @@ inline bool isLocalSet(unsigned Opc) { case WebAssembly::LOCAL_SET_FUNCREF_S: case WebAssembly::LOCAL_SET_EXTERNREF: case WebAssembly::LOCAL_SET_EXTERNREF_S: + case WebAssembly::LOCAL_SET_EXNREF: + case WebAssembly::LOCAL_SET_EXNREF_S: return true; default: return false; @@ -533,6 +544,8 @@ inline bool isLocalTee(unsigned Opc) { case WebAssembly::LOCAL_TEE_FUNCREF_S: case WebAssembly::LOCAL_TEE_EXTERNREF: case WebAssembly::LOCAL_TEE_EXTERNREF_S: + case WebAssembly::LOCAL_TEE_EXNREF: + case WebAssembly::LOCAL_TEE_EXNREF_S: return true; default: return false; diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp index 8ea02bd2ad1ff..d9c8e22bbbaf5 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp @@ -27,6 +27,7 @@ std::optional WebAssembly::parseType(StringRef Type) { wasm::ValType::V128) .Case("funcref", wasm::ValType::FUNCREF) .Case("externref", wasm::ValType::EXTERNREF) + .Case("exnref", wasm::ValType::EXNREF) .Default(std::nullopt); } @@ -40,6 +41,7 @@ WebAssembly::BlockType WebAssembly::parseBlockType(StringRef Type) { .Case("v128", WebAssembly::BlockType::V128) .Case("funcref", WebAssembly::BlockType::Funcref) .Case("externref", WebAssembly::BlockType::Externref) + .Case("exnref", WebAssembly::BlockType::Exnref) .Case("void", WebAssembly::BlockType::Void) .Default(WebAssembly::BlockType::Invalid); } @@ -62,6 +64,8 @@ const char *WebAssembly::anyTypeToString(unsigned Type) { return "funcref"; case wasm::WASM_TYPE_EXTERNREF: return "externref"; + case wasm::WASM_TYPE_EXNREF: + return "exnref"; case wasm::WASM_TYPE_FUNC: return "func"; case wasm::WASM_TYPE_NORESULT: @@ -110,6 +114,8 @@ wasm::ValType WebAssembly::regClassToValType(unsigned RC) { return wasm::ValType::FUNCREF; case WebAssembly::EXTERNREFRegClassID: return wasm::ValType::EXTERNREF; + case WebAssembly::EXNREFRegClassID: + return wasm::ValType::EXNREF; default: llvm_unreachable("unexpected type"); } diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.h index 486cf264d13e2..063ee4dba9068 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.h @@ -32,6 +32,7 @@ enum class BlockType : unsigned { V128 = unsigned(wasm::ValType::V128), Externref = unsigned(wasm::ValType::EXTERNREF), Funcref = unsigned(wasm::ValType::FUNCREF), + Exnref = unsigned(wasm::ValType::EXNREF), // Multivalue blocks (and other non-void blocks) are only emitted when the // blocks will never be exited and are at the ends of functions (see // WebAssemblyCFGStackify::fixEndsAtEndOfFunction). They also are never made @@ -41,7 +42,8 @@ enum class BlockType : unsigned { }; inline bool isRefType(wasm::ValType Type) { - return Type == wasm::ValType::EXTERNREF || Type == wasm::ValType::FUNCREF; + return Type == wasm::ValType::EXTERNREF || Type == wasm::ValType::FUNCREF || + Type == wasm::ValType::EXNREF; } // Convert ValType or a list/signature of ValTypes to a string. diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp index 867953b4e8d71..f9293460e701a 100644 --- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp +++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp @@ -33,6 +33,7 @@ MVT WebAssembly::parseMVT(StringRef Type) { .Case("v2i64", MVT::v2i64) .Case("funcref", MVT::funcref) .Case("externref", MVT::externref) + .Case("exnref", MVT::exnref) .Default(MVT::INVALID_SIMPLE_VALUE_TYPE); } @@ -58,6 +59,8 @@ wasm::ValType WebAssembly::toValType(MVT Type) { return wasm::ValType::FUNCREF; case MVT::externref: return wasm::ValType::EXTERNREF; + case MVT::exnref: + return wasm::ValType::EXNREF; default: llvm_unreachable("unexpected type"); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index 443558537da24..0b7ec6e74cab2 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -125,6 +125,8 @@ static char getInvokeSig(wasm::ValType VT) { return 'F'; case wasm::ValType::EXTERNREF: return 'X'; + case wasm::ValType::EXNREF: + return 'E'; default: llvm_unreachable("Unhandled wasm::ValType enum"); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp index 0159c44a79b76..3c6a29311a10e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp @@ -100,6 +100,8 @@ static unsigned getDropOpcode(const TargetRegisterClass *RC) { return WebAssembly::DROP_FUNCREF; if (RC == &WebAssembly::EXTERNREFRegClass) return WebAssembly::DROP_EXTERNREF; + if (RC == &WebAssembly::EXNREFRegClass) + return WebAssembly::DROP_EXNREF; llvm_unreachable("Unexpected register class"); } @@ -119,6 +121,8 @@ static unsigned getLocalGetOpcode(const TargetRegisterClass *RC) { return WebAssembly::LOCAL_GET_FUNCREF; if (RC == &WebAssembly::EXTERNREFRegClass) return WebAssembly::LOCAL_GET_EXTERNREF; + if (RC == &WebAssembly::EXNREFRegClass) + return WebAssembly::LOCAL_GET_EXNREF; llvm_unreachable("Unexpected register class"); } @@ -138,6 +142,8 @@ static unsigned getLocalSetOpcode(const TargetRegisterClass *RC) { return WebAssembly::LOCAL_SET_FUNCREF; if (RC == &WebAssembly::EXTERNREFRegClass) return WebAssembly::LOCAL_SET_EXTERNREF; + if (RC == &WebAssembly::EXNREFRegClass) + return WebAssembly::LOCAL_SET_EXNREF; llvm_unreachable("Unexpected register class"); } @@ -157,6 +163,8 @@ static unsigned getLocalTeeOpcode(const TargetRegisterClass *RC) { return WebAssembly::LOCAL_TEE_FUNCREF; if (RC == &WebAssembly::EXTERNREFRegClass) return WebAssembly::LOCAL_TEE_EXTERNREF; + if (RC == &WebAssembly::EXNREFRegClass) + return WebAssembly::LOCAL_TEE_EXNREF; llvm_unreachable("Unexpected register class"); } @@ -176,6 +184,8 @@ static MVT typeForRegClass(const TargetRegisterClass *RC) { return MVT::funcref; if (RC == &WebAssembly::EXTERNREFRegClass) return MVT::externref; + if (RC == &WebAssembly::EXNREFRegClass) + return MVT::exnref; llvm_unreachable("unrecognized register class"); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index 26e13948bc9a6..aa3aa1b007a53 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -137,6 +137,10 @@ class WebAssemblyFastISel final : public FastISel { if (Subtarget->hasReferenceTypes()) return VT; break; + case MVT::exnref: + if (Subtarget->hasReferenceTypes() && Subtarget->hasExceptionHandling()) + return VT; + break; case MVT::f16: return MVT::f32; case MVT::v16i8: @@ -717,6 +721,10 @@ bool WebAssemblyFastISel::fastLowerArguments() { Opc = WebAssembly::ARGUMENT_externref; RC = &WebAssembly::EXTERNREFRegClass; break; + case MVT::exnref: + Opc = WebAssembly::ARGUMENT_exnref; + RC = &WebAssembly::EXNREFRegClass; + break; default: return false; } @@ -821,6 +829,9 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) { case MVT::externref: ResultReg = createResultReg(&WebAssembly::EXTERNREFRegClass); break; + case MVT::exnref: + ResultReg = createResultReg(&WebAssembly::EXNREFRegClass); + break; default: return false; } @@ -948,6 +959,10 @@ bool WebAssemblyFastISel::selectSelect(const Instruction *I) { Opc = WebAssembly::SELECT_EXTERNREF; RC = &WebAssembly::EXTERNREFRegClass; break; + case MVT::exnref: + Opc = WebAssembly::SELECT_EXNREF; + RC = &WebAssembly::EXNREFRegClass; + break; default: return false; } @@ -1355,6 +1370,7 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) { case MVT::v2f64: case MVT::funcref: case MVT::externref: + case MVT::exnref: break; default: return false; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 518b6932a0c87..4beab9d091581 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -76,6 +76,9 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( if (Subtarget->hasReferenceTypes()) { addRegisterClass(MVT::externref, &WebAssembly::EXTERNREFRegClass); addRegisterClass(MVT::funcref, &WebAssembly::FUNCREFRegClass); + if (Subtarget->hasExceptionHandling()) { + addRegisterClass(MVT::exnref, &WebAssembly::EXNREFRegClass); + } } // Compute derived properties from the register classes. computeRegisterProperties(Subtarget->getRegisterInfo()); @@ -142,6 +145,11 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setTruncStoreAction(T, MVT::f16, Expand); } + if (Subtarget->hasHalfPrecision()) { + setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal); + setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal); + } + // Expand unavailable integer operations. for (auto Op : {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index c1a5a45395e87..3d37eb2fa27bc 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -292,6 +292,7 @@ defm "": ARGUMENT; defm "": ARGUMENT; defm "": ARGUMENT; defm "": ARGUMENT; +defm "": ARGUMENT; // local.get and local.set are not generated by instruction selection; they // are implied by virtual register uses and defs. @@ -375,6 +376,8 @@ defm "" : LOCAL; defm "" : LOCAL, Requires<[HasSIMD128]>; defm "" : LOCAL, Requires<[HasReferenceTypes]>; defm "" : LOCAL, Requires<[HasReferenceTypes]>; +defm "" : LOCAL, + Requires<[HasReferenceTypes, HasExceptionHandling]>; let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1 in { defm CONST_I32 : I<(outs I32:$res), (ins i32imm_op:$imm), diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td index 608963d588635..2654a09387fd4 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td @@ -17,8 +17,9 @@ multiclass REF_I { [(set rc:$dst, (!cast("int_wasm_ref_null_" # ht)))], "ref.null_" # ht # "$dst", "ref.null_" # ht, - !cond(!eq(ht, "func") : 0xd070, - !eq(ht, "extern") : 0xd06f)>, + !cond(!eq(ht, "func") : 0xd070, + !eq(ht, "extern") : 0xd06f, + !eq(ht, "exn") : 0xd069)>, Requires<[HasReferenceTypes]>; defm SELECT_#rc: I<(outs rc:$dst), (ins rc:$lhs, rc:$rhs, I32:$cond), (outs), (ins), @@ -37,8 +38,9 @@ multiclass REF_I { defm "" : REF_I; defm "" : REF_I; +defm "" : REF_I; -foreach rc = [FUNCREF, EXTERNREF] in { +foreach rc = [FUNCREF, EXTERNREF, EXNREF] in { def : Pat<(select (i32 (setne I32:$cond, 0)), rc:$lhs, rc:$rhs), (!cast("SELECT_"#rc) rc:$lhs, rc:$rhs, I32:$cond)>; def : Pat<(select (i32 (seteq I32:$cond, 0)), rc:$lhs, rc:$rhs), diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 558e3d859dcd8..baf15ccdbe9ed 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -16,33 +16,34 @@ multiclass ABSTRACT_SIMD_I pattern_r, string asmstr_r, string asmstr_s, bits<32> simdop, - Predicate simd_level> { + list reqs> { defm "" : I, - Requires<[simd_level]>; + Requires; } multiclass SIMD_I pattern_r, string asmstr_r = "", - string asmstr_s = "", bits<32> simdop = -1> { + string asmstr_s = "", bits<32> simdop = -1, + list reqs = []> { defm "" : ABSTRACT_SIMD_I; + asmstr_s, simdop, !listconcat([HasSIMD128], reqs)>; } multiclass RELAXED_I pattern_r, string asmstr_r = "", string asmstr_s = "", bits<32> simdop = -1> { defm "" : ABSTRACT_SIMD_I; + asmstr_s, simdop, [HasRelaxedSIMD]>; } multiclass HALF_PRECISION_I pattern_r, string asmstr_r = "", string asmstr_s = "", bits<32> simdop = -1> { defm "" : ABSTRACT_SIMD_I; + asmstr_s, simdop, [HasHalfPrecision]>; } @@ -152,6 +153,19 @@ def F64x2 : Vec { let prefix = "f64x2"; } +def F16x8 : Vec { + let vt = v8f16; + let int_vt = v8i16; + let lane_vt = f32; + let lane_rc = F32; + let lane_bits = 16; + let lane_idx = LaneIdx8; + let lane_load = int_wasm_loadf16_f32; + let splat = PatFrag<(ops node:$x), (v8f16 (splat_vector (f16 $x)))>; + let prefix = "f16x8"; +} + +// TODO: Include F16x8 here when half precision is better supported. defvar AllVecs = [I8x16, I16x8, I32x4, I64x2, F32x4, F64x2]; defvar IntVecs = [I8x16, I16x8, I32x4, I64x2]; @@ -781,13 +795,19 @@ def : Pat<(v2i64 (nodes[0] (v2f64 V128:$lhs), (v2f64 V128:$rhs))), // Bitwise operations //===----------------------------------------------------------------------===// -multiclass SIMDBinary simdop> { +multiclass SIMDBinary simdop, list reqs = []> { defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins), [(set (vec.vt V128:$dst), (node (vec.vt V128:$lhs), (vec.vt V128:$rhs)))], vec.prefix#"."#name#"\t$dst, $lhs, $rhs", - vec.prefix#"."#name, simdop>; + vec.prefix#"."#name, simdop, reqs>; +} + +multiclass HalfPrecisionBinary simdop> { + defm "" : SIMDBinary; } multiclass SIMDBitwise simdop, @@ -1199,6 +1219,7 @@ def : Pat<(v2f64 (froundeven (v2f64 V128:$src))), (NEAREST_F64x2 V128:$src)>; multiclass SIMDBinaryFP baseInst> { defm "" : SIMDBinary; defm "" : SIMDBinary; + defm "" : HalfPrecisionBinary; } // Addition: add @@ -1242,7 +1263,7 @@ defm PMAX : SIMDBinaryFP; // Also match the pmin/pmax cases where the operands are int vectors (but the // comparison is still a floating point comparison). This can happen when using // the wasm_simd128.h intrinsics because v128_t is an integer vector. -foreach vec = [F32x4, F64x2] in { +foreach vec = [F32x4, F64x2, F16x8] in { defvar pmin = !cast("PMIN_"#vec); defvar pmax = !cast("PMAX_"#vec); def : Pat<(vec.int_vt (vselect @@ -1266,6 +1287,10 @@ def : Pat<(v2f64 (int_wasm_pmin (v2f64 V128:$lhs), (v2f64 V128:$rhs))), (PMIN_F64x2 V128:$lhs, V128:$rhs)>; def : Pat<(v2f64 (int_wasm_pmax (v2f64 V128:$lhs), (v2f64 V128:$rhs))), (PMAX_F64x2 V128:$lhs, V128:$rhs)>; +def : Pat<(v8f16 (int_wasm_pmin (v8f16 V128:$lhs), (v8f16 V128:$rhs))), + (PMIN_F16x8 V128:$lhs, V128:$rhs)>; +def : Pat<(v8f16 (int_wasm_pmax (v8f16 V128:$lhs), (v8f16 V128:$rhs))), + (PMAX_F16x8 V128:$lhs, V128:$rhs)>; //===----------------------------------------------------------------------===// // Conversions diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td index 069ce5e3bc94a..02f0ab8577c3d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td @@ -64,6 +64,8 @@ multiclass TABLE { defm "" : TABLE, Requires<[HasReferenceTypes]>; defm "" : TABLE, Requires<[HasReferenceTypes]>; +defm "" : TABLE, + Requires<[HasReferenceTypes, HasExceptionHandling]>; def : Pat<(WebAssemblyTableSet mcsym:$table, i32:$idx, funcref:$r), (TABLE_SET_FUNCREF mcsym:$table, i32:$idx, funcref:$r)>, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index ef174e1716ef1..d4edb6bf18d93 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -504,6 +504,8 @@ static unsigned getTeeOpcode(const TargetRegisterClass *RC) { return WebAssembly::TEE_EXTERNREF; if (RC == &WebAssembly::FUNCREFRegClass) return WebAssembly::TEE_FUNCREF; + if (RC == &WebAssembly::EXNREFRegClass) + return WebAssembly::TEE_EXNREF; llvm_unreachable("Unexpected register class"); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td index 4e2faa608be07..17889dacc868c 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td @@ -45,6 +45,7 @@ def V128_0: WebAssemblyReg<"%v128">; def FUNCREF_0 : WebAssemblyReg<"%funcref.0">; def EXTERNREF_0 : WebAssemblyReg<"%externref.0">; +def EXNREF_0 : WebAssemblyReg<"%exnref.0">; // The value stack "register". This is an opaque entity which serves to order // uses and defs that must remain in LIFO order. @@ -68,3 +69,4 @@ def V128 : WebAssemblyRegClass<[v8f16, v4f32, v2f64, v2i64, v4i32, v16i8, 128, (add V128_0)>; def FUNCREF : WebAssemblyRegClass<[funcref], 0, (add FUNCREF_0)>; def EXTERNREF : WebAssemblyRegClass<[externref], 0, (add EXTERNREF_0)>; +def EXNREF : WebAssemblyRegClass<[exnref], 0, (add EXNREF_0)>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index de342e8965736..fd92a35c2638a 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -385,18 +385,36 @@ FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) { using WebAssembly::WasmEnableEH; using WebAssembly::WasmEnableEmEH; using WebAssembly::WasmEnableEmSjLj; +using WebAssembly::WasmEnableExnref; using WebAssembly::WasmEnableSjLj; static void basicCheckForEHAndSjLj(TargetMachine *TM) { - // Before checking, we make sure TargetOptions.ExceptionModel is the same as + + // You can't enable two modes of EH at the same time + if (WasmEnableEmEH && WasmEnableEH) + report_fatal_error( + "-enable-emscripten-cxx-exceptions not allowed with -wasm-enable-eh"); + // You can't enable two modes of SjLj at the same time + if (WasmEnableEmSjLj && WasmEnableSjLj) + report_fatal_error( + "-enable-emscripten-sjlj not allowed with -wasm-enable-sjlj"); + // You can't mix Emscripten EH with Wasm SjLj. + if (WasmEnableEmEH && WasmEnableSjLj) + report_fatal_error( + "-enable-emscripten-cxx-exceptions not allowed with -wasm-enable-sjlj"); + if (WasmEnableExnref && !WasmEnableEH) + report_fatal_error( + "-wasm-enable-exnref should be used with -wasm-enable-eh"); + + // Here we make sure TargetOptions.ExceptionModel is the same as // MCAsmInfo.ExceptionsType. Normally these have to be the same, because clang // stores the exception model info in LangOptions, which is later transferred // to TargetOptions and MCAsmInfo. But when clang compiles bitcode directly, // clang's LangOptions is not used and thus the exception model info is not // correctly transferred to TargetOptions and MCAsmInfo, so we make sure we - // have the correct exception model in WebAssemblyMCAsmInfo constructor. - // But in this case TargetOptions is still not updated, so we make sure they - // are the same. + // have the correct exception model in WebAssemblyMCAsmInfo constructor. But + // in this case TargetOptions is still not updated, so we make sure they are + // the same. TM->Options.ExceptionModel = TM->getMCAsmInfo()->getExceptionHandlingType(); // Basic Correctness checking related to -exception-model @@ -418,18 +436,6 @@ static void basicCheckForEHAndSjLj(TargetMachine *TM) { "-exception-model=wasm only allowed with at least one of " "-wasm-enable-eh or -wasm-enable-sjlj"); - // You can't enable two modes of EH at the same time - if (WasmEnableEmEH && WasmEnableEH) - report_fatal_error( - "-enable-emscripten-cxx-exceptions not allowed with -wasm-enable-eh"); - // You can't enable two modes of SjLj at the same time - if (WasmEnableEmSjLj && WasmEnableSjLj) - report_fatal_error( - "-enable-emscripten-sjlj not allowed with -wasm-enable-sjlj"); - // You can't mix Emscripten EH with Wasm SjLj. - if (WasmEnableEmEH && WasmEnableSjLj) - report_fatal_error( - "-enable-emscripten-cxx-exceptions not allowed with -wasm-enable-sjlj"); // Currently it is allowed to mix Wasm EH with Emscripten SjLj as an interim // measure, but some code will error out at compile time in this combination. // See WebAssemblyLowerEmscriptenEHSjLj pass for details. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp index 60e872549f87d..5e7279808cce6 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp @@ -175,6 +175,8 @@ unsigned WebAssembly::getCopyOpcodeForRegClass(const TargetRegisterClass *RC) { return WebAssembly::COPY_FUNCREF; case WebAssembly::EXTERNREFRegClassID: return WebAssembly::COPY_EXTERNREF; + case WebAssembly::EXNREFRegClassID: + return WebAssembly::COPY_EXNREF; default: llvm_unreachable("Unexpected register class"); } diff --git a/llvm/lib/Target/X86/X86CompressEVEX.cpp b/llvm/lib/Target/X86/X86CompressEVEX.cpp index 6442cc2193308..11b2155e3f985 100644 --- a/llvm/lib/Target/X86/X86CompressEVEX.cpp +++ b/llvm/lib/Target/X86/X86CompressEVEX.cpp @@ -14,6 +14,7 @@ // b. Promoted instruction (EVEX) -> pre-promotion instruction (legacy/VEX) // c. NDD (EVEX) -> non-NDD (legacy) // d. NF_ND (EVEX) -> NF (EVEX) +// e. NonNF (EVEX) -> NF (EVEX) // // Compression a, b and c can always reduce code size, with some exceptions // such as promoted 16-bit CRC32 which is as long as the legacy version. @@ -30,6 +31,9 @@ // // Compression d can help hardware decode (HW may skip reading the NDD // register) although the instruction length remains unchanged. +// +// Compression e can help hardware skip updating EFLAGS although the instruction +// length remains unchanged. //===----------------------------------------------------------------------===// #include "MCTargetDesc/X86BaseInfo.h" @@ -177,7 +181,8 @@ static bool isRedundantNewDataDest(MachineInstr &MI, const X86Subtarget &ST) { const MCInstrDesc &Desc = MI.getDesc(); Register Reg0 = MI.getOperand(0).getReg(); const MachineOperand &Op1 = MI.getOperand(1); - if (!Op1.isReg() || X86::getFirstAddrOperandIdx(MI) == 1) + if (!Op1.isReg() || X86::getFirstAddrOperandIdx(MI) == 1 || + X86::isCFCMOVCC(MI.getOpcode())) return false; Register Reg1 = Op1.getReg(); if (Reg1 == Reg0) @@ -219,25 +224,36 @@ static bool CompressEVEXImpl(MachineInstr &MI, const X86Subtarget &ST) { return false; // MOVBE*rr is special because it has semantic of NDD but not set EVEX_B. bool IsNDLike = IsND || Opc == X86::MOVBE32rr || Opc == X86::MOVBE64rr; - if (IsNDLike && !isRedundantNewDataDest(MI, ST)) + bool IsRedundantNDD = IsNDLike ? isRedundantNewDataDest(MI, ST) : false; + // NonNF -> NF only if it's not a compressible NDD instruction and eflags is + // dead. + unsigned NFOpc = (ST.hasNF() && !IsRedundantNDD && + MI.registerDefIsDead(X86::EFLAGS, /*TRI=*/nullptr)) + ? X86::getNFVariant(Opc) + : 0U; + if (IsNDLike && !IsRedundantNDD && !NFOpc) return false; - ArrayRef Table = ArrayRef(X86CompressEVEXTable); - - Opc = MI.getOpcode(); - const auto *I = llvm::lower_bound(Table, Opc); - if (I == Table.end() || I->OldOpc != Opc) { - assert(!IsNDLike && "Missing entry for ND-like instruction"); - return false; - } + unsigned NewOpc = NFOpc; + if (!NewOpc) { + ArrayRef Table = ArrayRef(X86CompressEVEXTable); - if (!IsNDLike) { - if (usesExtendedRegister(MI) || !checkPredicate(I->NewOpc, &ST) || - !performCustomAdjustments(MI, I->NewOpc)) + Opc = MI.getOpcode(); + const auto I = llvm::lower_bound(Table, Opc); + if (I == Table.end() || I->OldOpc != Opc) { + assert(!IsNDLike && "Missing entry for ND-like instruction"); return false; + } + + if (!IsNDLike) { + if (usesExtendedRegister(MI) || !checkPredicate(I->NewOpc, &ST) || + !performCustomAdjustments(MI, I->NewOpc)) + return false; + } + NewOpc = I->NewOpc; } - const MCInstrDesc &NewDesc = ST.getInstrInfo()->get(I->NewOpc); + const MCInstrDesc &NewDesc = ST.getInstrInfo()->get(NewOpc); MI.setDesc(NewDesc); unsigned AsmComment; switch (NewDesc.TSFlags & X86II::EncodingMask) { @@ -256,7 +272,7 @@ static bool CompressEVEXImpl(MachineInstr &MI, const X86Subtarget &ST) { llvm_unreachable("Unknown EVEX compression"); } MI.setAsmPrinterFlag(AsmComment); - if (IsNDLike) + if (IsRedundantNDD) MI.tieOperands(0, 1); return true; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 7d05f950b6fe9..3e391da807889 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3221,6 +3221,14 @@ int X86::getCCMPCondFlagsFromCondCode(X86::CondCode CC) { } } +#define GET_X86_NF_TRANSFORM_TABLE +#include "X86GenInstrMapping.inc" +unsigned X86::getNFVariant(unsigned Opc) { + ArrayRef Table = ArrayRef(X86NFTransformTable); + const auto I = llvm::lower_bound(Table, Opc); + return (I == Table.end() || I->OldOpc != Opc) ? 0U : I->NewOpc; +} + /// Return the inverse of the specified condition, /// e.g. turning COND_E to COND_NE. X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 295fac60c6e40..9eb2bd56b2ab5 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -77,6 +77,9 @@ CondCode getCondFromCCMP(const MachineInstr &MI); // Turn condition code into condition flags for CCMP/CTEST. int getCCMPCondFlagsFromCondCode(CondCode CC); +// Get the opcode of corresponding NF variant. +unsigned getNFVariant(unsigned Opc); + /// GetOppositeBranchCondition - Return the inverse of the specified cond, /// e.g. turning COND_E to COND_NE. CondCode GetOppositeBranchCondition(CondCode CC); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index d935be7669f05..3b18e39d784b2 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -6257,7 +6257,8 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512( AddressSpace, CostKind); unsigned VF = VecTy->getNumElements() / Factor; - MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF); + MVT VT = + MVT::getVectorVT(TLI->getSimpleValueType(DL, VecTy->getScalarType()), VF); InstructionCost MaskCost; if (UseMaskedMemOp) { diff --git a/llvm/lib/TargetParser/AArch64TargetParser.cpp b/llvm/lib/TargetParser/AArch64TargetParser.cpp index c10b4be4eded9..ca356ec82bf1f 100644 --- a/llvm/lib/TargetParser/AArch64TargetParser.cpp +++ b/llvm/lib/TargetParser/AArch64TargetParser.cpp @@ -181,12 +181,6 @@ void AArch64::ExtensionSet::enable(ArchExtKind E) { !BaseArch->is_superset(ARMV9A)) enable(AEK_FP16FML); - // For all architectures, +crypto enables +aes and +sha2. - if (E == AEK_CRYPTO) { - enable(AEK_AES); - enable(AEK_SHA2); - } - // For v8.4A+ and v9.0A+, +crypto also enables +sha3 and +sm4. if (E == AEK_CRYPTO && BaseArch->is_superset(ARMV8_4A)) { enable(AEK_SHA3); diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 38b8dab984db3..8e829a53aeca2 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -2756,12 +2756,11 @@ static void sinkSpillUsesAfterCoroBegin(Function &F, /// after the suspend block. Doing so minimizes the lifetime of each variable, /// hence minimizing the amount of data we end up putting on the frame. static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape, - SuspendCrossingInfo &Checker) { + SuspendCrossingInfo &Checker, + const DominatorTree &DT) { if (F.hasOptNone()) return; - DominatorTree DT(F); - // Collect all possible basic blocks which may dominate all uses of allocas. SmallPtrSet DomSet; DomSet.insert(&F.getEntryBlock()); @@ -3149,12 +3148,13 @@ void coro::buildCoroutineFrame( doRematerializations(F, Checker, MaterializableCallback); + const DominatorTree DT(F); FrameDataInfo FrameData; SmallVector LocalAllocas; SmallVector DeadInstructions; if (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon && Shape.ABI != coro::ABI::RetconOnce) - sinkLifetimeStartMarkers(F, Shape, Checker); + sinkLifetimeStartMarkers(F, Shape, Checker, DT); // Collect the spills for arguments and other not-materializable values. for (Argument &A : F.args()) @@ -3162,7 +3162,6 @@ void coro::buildCoroutineFrame( if (Checker.isDefinitionAcrossSuspend(A, U)) FrameData.Spills[&A].push_back(cast(U)); - const DominatorTree DT(F); for (Instruction &I : instructions(F)) { // Values returned from coroutine structure intrinsics should not be part // of the Coroutine Frame. diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index 633fcb3314c42..f86f217bca588 100644 --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -879,7 +879,7 @@ void LowerTypeTestsModule::buildBitSetsFromGlobalVariables( // Multiply by 2 to account for padding elements. Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, I * 2)}; - Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr( + Constant *CombinedGlobalElemPtr = ConstantExpr::getInBoundsGetElementPtr( NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs); assert(GV->getType()->getAddressSpace() == 0); GlobalAlias *GAlias = diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 0920179fb76b7..92ad4c34da6e7 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1391,10 +1391,11 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) { return InlineCost::getAlways("preinliner"); } - // For old FDO inliner, we inline the call site as long as cost is not - // "Never". The cost-benefit check is done earlier. + // For old FDO inliner, we inline the call site if it is below hot threshold, + // even if the function is hot based on sample profile data. This is to + // prevent huge functions from being inlined. if (!CallsitePrioritizedInline) { - return InlineCost::get(Cost.getCost(), INT_MAX); + return InlineCost::get(Cost.getCost(), SampleHotCallSiteThreshold); } // Otherwise only use the cost from call analyzer, but overwite threshold with diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index e7a188e9431db..9929ebb96dcaf 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -1927,7 +1927,7 @@ void DevirtModule::rebuildGlobal(VTableBits &B) { // element (the original initializer). auto Alias = GlobalAlias::create( B.GV->getInitializer()->getType(), 0, B.GV->getLinkage(), "", - ConstantExpr::getGetElementPtr( + ConstantExpr::getInBoundsGetElementPtr( NewInit->getType(), NewGV, ArrayRef{ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 1)}), diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index c3272d97509f5..89193f8ff94b6 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -4730,6 +4730,21 @@ static Instruction *foldICmpAndXX(ICmpInst &I, const SimplifyQuery &Q, if (Pred == ICmpInst::ICMP_UGE) return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1); + if (ICmpInst::isEquality(Pred) && Op0->hasOneUse()) { + // icmp (X & Y) eq/ne Y --> (X | ~Y) eq/ne -1 if Y is freely invertible and + // Y is non-constant. If Y is constant the `X & C == C` form is preferable + // so don't do this fold. + if (!match(Op1, m_ImmConstant())) + if (auto *NotOp1 = + IC.getFreelyInverted(Op1, !Op1->hasNUsesOrMore(3), &IC.Builder)) + return new ICmpInst(Pred, IC.Builder.CreateOr(A, NotOp1), + Constant::getAllOnesValue(Op1->getType())); + // icmp (X & Y) eq/ne Y --> (~X & Y) eq/ne 0 if X is freely invertible. + if (auto *NotA = IC.getFreelyInverted(A, A->hasOneUse(), &IC.Builder)) + return new ICmpInst(Pred, IC.Builder.CreateAnd(Op1, NotA), + Constant::getNullValue(Op1->getType())); + } + return nullptr; } @@ -5505,21 +5520,6 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) { } } - // canoncalize: - // (icmp eq/ne (and X, C), X) - // -> (icmp eq/ne (and X, ~C), 0) - { - Constant *CMask; - A = nullptr; - if (match(Op0, m_OneUse(m_And(m_Specific(Op1), m_ImmConstant(CMask))))) - A = Op1; - else if (match(Op1, m_OneUse(m_And(m_Specific(Op0), m_ImmConstant(CMask))))) - A = Op0; - if (A) - return new ICmpInst(Pred, Builder.CreateAnd(A, Builder.CreateNot(CMask)), - Constant::getNullValue(A->getType())); - } - if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && (A == Op0 || B == Op0)) { // A == (A^B) -> B == 0 Value *OtherVal = A == Op0 ? B : A; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 0f1979fbe0c76..4f91993750fd2 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -1461,13 +1461,24 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) { const APInt *MulC; if (match(Op0, m_NUWMul(m_Value(X), m_APInt(MulC)))) { - // Look for a "splat" mul pattern - it replicates bits across each half of - // a value, so a right shift is just a mask of the low bits: - // lshr i[2N] (mul nuw X, (2^N)+1), N --> and iN X, (2^N)-1 - // TODO: Generalize to allow more than just half-width shifts? - if (BitWidth > 2 && ShAmtC * 2 == BitWidth && (*MulC - 1).isPowerOf2() && - MulC->logBase2() == ShAmtC) - return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *MulC - 2)); + if (BitWidth > 2 && (*MulC - 1).isPowerOf2() && + MulC->logBase2() == ShAmtC) { + // Look for a "splat" mul pattern - it replicates bits across each half + // of a value, so a right shift is just a mask of the low bits: + // lshr i[2N] (mul nuw X, (2^N)+1), N --> and iN X, (2^N)-1 + if (ShAmtC * 2 == BitWidth) + return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *MulC - 2)); + + // lshr (mul nuw (X, 2^N + 1)), N -> add nuw (X, lshr(X, N)) + if (Op0->hasOneUse()) { + auto *NewAdd = BinaryOperator::CreateNUWAdd( + X, Builder.CreateLShr(X, ConstantInt::get(Ty, ShAmtC), "", + I.isExact())); + NewAdd->setHasNoSignedWrap( + cast(Op0)->hasNoSignedWrap()); + return NewAdd; + } + } // The one-use check is not strictly necessary, but codegen may not be // able to invert the transform and perf may suffer with an extra mul @@ -1487,6 +1498,16 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) { } } + // lshr (mul nsw (X, 2^N + 1)), N -> add nsw (X, lshr(X, N)) + if (match(Op0, m_OneUse(m_NSWMul(m_Value(X), m_APInt(MulC))))) { + if (BitWidth > 2 && (*MulC - 1).isPowerOf2() && + MulC->logBase2() == ShAmtC) { + return BinaryOperator::CreateNSWAdd( + X, Builder.CreateLShr(X, ConstantInt::get(Ty, ShAmtC), "", + I.isExact())); + } + } + // Try to narrow bswap. // In the case where the shift amount equals the bitwidth difference, the // shift is eliminated. @@ -1690,6 +1711,21 @@ Instruction *InstCombinerImpl::visitAShr(BinaryOperator &I) { if (match(Op0, m_OneUse(m_NSWSub(m_Value(X), m_Value(Y))))) return new SExtInst(Builder.CreateICmpSLT(X, Y), Ty); } + + const APInt *MulC; + if (match(Op0, m_OneUse(m_NSWMul(m_Value(X), m_APInt(MulC)))) && + (BitWidth > 2 && (*MulC - 1).isPowerOf2() && + MulC->logBase2() == ShAmt && + (ShAmt < BitWidth - 1))) /* Minus 1 for the sign bit */ { + + // ashr (mul nsw (X, 2^N + 1)), N -> add nsw (X, ashr(X, N)) + auto *NewAdd = BinaryOperator::CreateNSWAdd( + X, + Builder.CreateAShr(X, ConstantInt::get(Ty, ShAmt), "", I.isExact())); + NewAdd->setHasNoUnsignedWrap( + cast(Op0)->hasNoUnsignedWrap()); + return NewAdd; + } } const SimplifyQuery Q = SQ.getWithInstruction(&I); diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 20d11e0ab55f2..f0b0917a25938 100644 --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -1804,8 +1804,8 @@ Value *DFSanFunction::getRetvalTLS(Type *T, IRBuilder<> &IRB) { Value *DFSanFunction::getRetvalOriginTLS() { return DFS.RetvalOriginTLS; } Value *DFSanFunction::getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB) { - return IRB.CreateConstGEP2_64(DFS.ArgOriginTLSTy, DFS.ArgOriginTLS, 0, ArgNo, - "_dfsarg_o"); + return IRB.CreateConstInBoundsGEP2_64(DFS.ArgOriginTLSTy, DFS.ArgOriginTLS, 0, + ArgNo, "_dfsarg_o"); } Value *DFSanFunction::getOrigin(Value *V) { diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 6aa4188d1cc4d..5eccf7b4adb65 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -2751,7 +2751,7 @@ static bool hoistMulAddAssociation(Instruction &I, Loop &L, IRBuilder<> Builder(Preheader->getTerminator()); for (auto *U : Changes) { assert(L.isLoopInvariant(U->get())); - Instruction *Ins = cast(U->getUser()); + auto *Ins = cast(U->getUser()); Value *Mul; if (I.getType()->isIntOrIntVectorTy()) { Mul = Builder.CreateMul(U->get(), Factor, "factor.op.mul"); @@ -2759,8 +2759,20 @@ static bool hoistMulAddAssociation(Instruction &I, Loop &L, Ins->dropPoisonGeneratingFlags(); } else Mul = Builder.CreateFMulFMF(U->get(), Factor, Ins, "factor.op.fmul"); - U->set(Mul); + + // Rewrite the reassociable instruction. + unsigned OpIdx = U->getOperandNo(); + auto *LHS = OpIdx == 0 ? Mul : Ins->getOperand(0); + auto *RHS = OpIdx == 1 ? Mul : Ins->getOperand(1); + auto *NewBO = BinaryOperator::Create(Ins->getOpcode(), LHS, RHS, + Ins->getName() + ".reass", Ins); + NewBO->copyIRFlags(Ins); + if (VariantOp == Ins) + VariantOp = NewBO; + Ins->replaceAllUsesWith(NewBO); + eraseInstruction(*Ins, SafetyInfo, MSSAU); } + I.replaceAllUsesWith(VariantOp); eraseInstruction(I, SafetyInfo, MSSAU); return true; diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index c7e25c9f3d2c9..3fe5478408d45 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -22,8 +22,6 @@ // // Future loop memory idioms to recognize: // memcmp, strlen, etc. -// Future floating point idioms to recognize in -ffast-math mode: -// fpowi // // This could recognize common matrix multiplies and dot product idioms and // replace them with calls to BLAS (if linked in??). @@ -1107,7 +1105,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( GV->setAlignment(Align(16)); Value *PatternPtr = GV; NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes}); - + // Set the TBAA info if present. if (AATags.TBAA) NewCall->setMetadata(LLVMContext::MD_tbaa, AATags.TBAA); @@ -1117,7 +1115,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( if (AATags.NoAlias) NewCall->setMetadata(LLVMContext::MD_noalias, AATags.NoAlias); - } + } NewCall->setDebugLoc(TheStore->getDebugLoc()); diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp index d91320863e241..04c54ed69e93f 100644 --- a/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -302,97 +302,6 @@ static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) { return Res; } -/// Returns k such that lambda(2^Bitwidth) = 2^k, where lambda is the Carmichael -/// function. This means that x^(2^k) === 1 mod 2^Bitwidth for -/// every odd x, i.e. x^(2^k) = 1 for every odd x in Bitwidth-bit arithmetic. -/// Note that 0 <= k < Bitwidth, and if Bitwidth > 3 then x^(2^k) = 0 for every -/// even x in Bitwidth-bit arithmetic. -static unsigned CarmichaelShift(unsigned Bitwidth) { - if (Bitwidth < 3) - return Bitwidth - 1; - return Bitwidth - 2; -} - -/// Add the extra weight 'RHS' to the existing weight 'LHS', -/// reducing the combined weight using any special properties of the operation. -/// The existing weight LHS represents the computation X op X op ... op X where -/// X occurs LHS times. The combined weight represents X op X op ... op X with -/// X occurring LHS + RHS times. If op is "Xor" for example then the combined -/// operation is equivalent to X if LHS + RHS is odd, or 0 if LHS + RHS is even; -/// the routine returns 1 in LHS in the first case, and 0 in LHS in the second. -static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) { - // If we were working with infinite precision arithmetic then the combined - // weight would be LHS + RHS. But we are using finite precision arithmetic, - // and the APInt sum LHS + RHS may not be correct if it wraps (it is correct - // for nilpotent operations and addition, but not for idempotent operations - // and multiplication), so it is important to correctly reduce the combined - // weight back into range if wrapping would be wrong. - - // If RHS is zero then the weight didn't change. - if (RHS.isMinValue()) - return; - // If LHS is zero then the combined weight is RHS. - if (LHS.isMinValue()) { - LHS = RHS; - return; - } - // From this point on we know that neither LHS nor RHS is zero. - - if (Instruction::isIdempotent(Opcode)) { - // Idempotent means X op X === X, so any non-zero weight is equivalent to a - // weight of 1. Keeping weights at zero or one also means that wrapping is - // not a problem. - assert(LHS == 1 && RHS == 1 && "Weights not reduced!"); - return; // Return a weight of 1. - } - if (Instruction::isNilpotent(Opcode)) { - // Nilpotent means X op X === 0, so reduce weights modulo 2. - assert(LHS == 1 && RHS == 1 && "Weights not reduced!"); - LHS = 0; // 1 + 1 === 0 modulo 2. - return; - } - if (Opcode == Instruction::Add || Opcode == Instruction::FAdd) { - // TODO: Reduce the weight by exploiting nsw/nuw? - LHS += RHS; - return; - } - - assert((Opcode == Instruction::Mul || Opcode == Instruction::FMul) && - "Unknown associative operation!"); - unsigned Bitwidth = LHS.getBitWidth(); - // If CM is the Carmichael number then a weight W satisfying W >= CM+Bitwidth - // can be replaced with W-CM. That's because x^W=x^(W-CM) for every Bitwidth - // bit number x, since either x is odd in which case x^CM = 1, or x is even in - // which case both x^W and x^(W - CM) are zero. By subtracting off multiples - // of CM like this weights can always be reduced to the range [0, CM+Bitwidth) - // which by a happy accident means that they can always be represented using - // Bitwidth bits. - // TODO: Reduce the weight by exploiting nsw/nuw? (Could do much better than - // the Carmichael number). - if (Bitwidth > 3) { - /// CM - The value of Carmichael's lambda function. - APInt CM = APInt::getOneBitSet(Bitwidth, CarmichaelShift(Bitwidth)); - // Any weight W >= Threshold can be replaced with W - CM. - APInt Threshold = CM + Bitwidth; - assert(LHS.ult(Threshold) && RHS.ult(Threshold) && "Weights not reduced!"); - // For Bitwidth 4 or more the following sum does not overflow. - LHS += RHS; - while (LHS.uge(Threshold)) - LHS -= CM; - } else { - // To avoid problems with overflow do everything the same as above but using - // a larger type. - unsigned CM = 1U << CarmichaelShift(Bitwidth); - unsigned Threshold = CM + Bitwidth; - assert(LHS.getZExtValue() < Threshold && RHS.getZExtValue() < Threshold && - "Weights not reduced!"); - unsigned Total = LHS.getZExtValue() + RHS.getZExtValue(); - while (Total >= Threshold) - Total -= CM; - LHS = Total; - } -} - using RepeatedValue = std::pair; /// Given an associative binary expression, return the leaf @@ -471,7 +380,7 @@ using RepeatedValue = std::pair; static bool LinearizeExprTree(Instruction *I, SmallVectorImpl &Ops, ReassociatePass::OrderedSet &ToRedo, - bool &HasNUW) { + reassociate::OverflowTracking &Flags) { assert((isa(I) || isa(I)) && "Expected a UnaryOperator or BinaryOperator!"); LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n'); @@ -512,6 +421,7 @@ static bool LinearizeExprTree(Instruction *I, using LeafMap = DenseMap; LeafMap Leaves; // Leaf -> Total weight so far. SmallVector LeafOrder; // Ensure deterministic leaf output order. + const DataLayout DL = I->getModule()->getDataLayout(); #ifndef NDEBUG SmallPtrSet Visited; // For checking the iteration scheme. @@ -520,8 +430,10 @@ static bool LinearizeExprTree(Instruction *I, std::pair P = Worklist.pop_back_val(); I = P.first; // We examine the operands of this binary operator. - if (isa(I)) - HasNUW &= I->hasNoUnsignedWrap(); + if (isa(I)) { + Flags.HasNUW &= I->hasNoUnsignedWrap(); + Flags.HasNSW &= I->hasNoSignedWrap(); + } for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx) { // Visit operands. Value *Op = I->getOperand(OpIdx); @@ -559,26 +471,7 @@ static bool LinearizeExprTree(Instruction *I, "In leaf map but not visited!"); // Update the number of paths to the leaf. - IncorporateWeight(It->second, Weight, Opcode); - -#if 0 // TODO: Re-enable once PR13021 is fixed. - // The leaf already has one use from inside the expression. As we want - // exactly one such use, drop this new use of the leaf. - assert(!Op->hasOneUse() && "Only one use, but we got here twice!"); - I->setOperand(OpIdx, UndefValue::get(I->getType())); - Changed = true; - - // If the leaf is a binary operation of the right kind and we now see - // that its multiple original uses were in fact all by nodes belonging - // to the expression, then no longer consider it to be a leaf and add - // its operands to the expression. - if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) { - LLVM_DEBUG(dbgs() << "UNLEAF: " << *Op << " (" << It->second << ")\n"); - Worklist.push_back(std::make_pair(BO, It->second)); - Leaves.erase(It); - continue; - } -#endif + It->second += Weight; // If we still have uses that are not accounted for by the expression // then it is not safe to modify the value. @@ -648,6 +541,8 @@ static bool LinearizeExprTree(Instruction *I, // Ensure the leaf is only output once. It->second = 0; Ops.push_back(std::make_pair(V, Weight)); + if (Opcode == Instruction::Add && Flags.AllKnownNonNegative && Flags.HasNSW) + Flags.AllKnownNonNegative &= isKnownNonNegative(V, SimplifyQuery(DL)); } // For nilpotent operations or addition there may be no operands, for example @@ -666,7 +561,7 @@ static bool LinearizeExprTree(Instruction *I, /// linearized and optimized, emit them in-order. void ReassociatePass::RewriteExprTree(BinaryOperator *I, SmallVectorImpl &Ops, - bool HasNUW) { + OverflowTracking Flags) { assert(Ops.size() > 1 && "Single values should be used directly!"); // Since our optimizations should never increase the number of operations, the @@ -834,8 +729,12 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I, // Note that it doesn't hold for mul if one of the operands is zero. // TODO: We can preserve NUW flag if we prove that all mul operands // are non-zero. - if (HasNUW && ExpressionChangedStart->getOpcode() == Instruction::Add) - ExpressionChangedStart->setHasNoUnsignedWrap(); + if (ExpressionChangedStart->getOpcode() == Instruction::Add) { + if (Flags.HasNUW) + ExpressionChangedStart->setHasNoUnsignedWrap(); + if (Flags.HasNSW && (Flags.AllKnownNonNegative || Flags.HasNUW)) + ExpressionChangedStart->setHasNoSignedWrap(); + } } } @@ -1192,8 +1091,8 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) { return nullptr; SmallVector Tree; - bool HasNUW = true; - MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts, HasNUW); + OverflowTracking Flags; + MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts, Flags); SmallVector Factors; Factors.reserve(Tree.size()); for (unsigned i = 0, e = Tree.size(); i != e; ++i) { @@ -1235,7 +1134,7 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) { if (!FoundFactor) { // Make sure to restore the operands to the expression tree. - RewriteExprTree(BO, Factors, HasNUW); + RewriteExprTree(BO, Factors, Flags); return nullptr; } @@ -1247,7 +1146,7 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) { RedoInsts.insert(BO); V = Factors[0].Op; } else { - RewriteExprTree(BO, Factors, HasNUW); + RewriteExprTree(BO, Factors, Flags); V = BO; } @@ -2373,8 +2272,8 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) { // First, walk the expression tree, linearizing the tree, collecting the // operand information. SmallVector Tree; - bool HasNUW = true; - MadeChange |= LinearizeExprTree(I, Tree, RedoInsts, HasNUW); + OverflowTracking Flags; + MadeChange |= LinearizeExprTree(I, Tree, RedoInsts, Flags); SmallVector Ops; Ops.reserve(Tree.size()); for (const RepeatedValue &E : Tree) @@ -2567,7 +2466,7 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) { dbgs() << '\n'); // Now that we ordered and optimized the expressions, splat them back into // the expression tree, removing any unneeded nodes. - RewriteExprTree(I, Ops, HasNUW); + RewriteExprTree(I, Ops, Flags); } void diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 7b846f2d2d72d..eb471b259c7d4 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -30,11 +30,12 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" -#include "llvm/IR/AttributeMask.h" #include "llvm/IR/Argument.h" +#include "llvm/IR/AttributeMask.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" @@ -1450,6 +1451,8 @@ static AttrBuilder IdentifyValidPoisonGeneratingAttributes(CallBase &CB) { Valid.addAttribute(Attribute::NonNull); if (CB.hasRetAttr(Attribute::Alignment)) Valid.addAlignmentAttr(CB.getRetAlign()); + if (std::optional Range = CB.getRange()) + Valid.addRangeAttr(*Range); return Valid; } @@ -1541,6 +1544,14 @@ static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) { if (ValidPG.getAlignment().valueOrOne() < AL.getRetAlignment().valueOrOne()) ValidPG.removeAttribute(Attribute::Alignment); if (ValidPG.hasAttributes()) { + Attribute CBRange = ValidPG.getAttribute(Attribute::Range); + if (CBRange.isValid()) { + Attribute NewRange = AL.getRetAttr(Attribute::Range); + if (NewRange.isValid()) { + ValidPG.addRangeAttr( + CBRange.getRange().intersectWith(NewRange.getRange())); + } + } // Three checks. // If the callsite has `noundef`, then a poison due to violating the // return attribute will create UB anyways so we can always propagate. diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 056f0d6b3ee6c..7ecfe5218ef67 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -1746,6 +1746,9 @@ static Value *generateNewInstTree(ArrayRef Item, FixedVectorType *Ty, return Builder.CreateCmp(CI->getPredicate(), Ops[0], Ops[1]); if (auto *SI = dyn_cast(I)) return Builder.CreateSelect(Ops[0], Ops[1], Ops[2], "", SI); + if (auto *CI = dyn_cast(I)) + return Builder.CreateCast((Instruction::CastOps)CI->getOpcode(), Ops[0], + DstTy); if (II) return Builder.CreateIntrinsic(DstTy, II->getIntrinsicID(), Ops); assert(isa(I) && "Unexpected instruction type in Generate"); @@ -1757,8 +1760,7 @@ static Value *generateNewInstTree(ArrayRef Item, FixedVectorType *Ty, // do so. bool VectorCombine::foldShuffleToIdentity(Instruction &I) { auto *Ty = dyn_cast(I.getType()); - if (!Ty || !isa(I.getOperand(0)) || - !isa(I.getOperand(1))) + if (!Ty) return false; SmallVector Start(Ty->getNumElements()); @@ -1847,7 +1849,7 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) { isa(FrontV)) { Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0)); Worklist.push_back(generateInstLaneVectorFromOperand(Item, 1)); - } else if (isa(FrontV)) { + } else if (isa(FrontV)) { Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0)); } else if (isa(FrontV)) { Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0)); diff --git a/llvm/test/Analysis/CostModel/X86/handle-iptr-with-data-layout-to-not-assert.ll b/llvm/test/Analysis/CostModel/X86/handle-iptr-with-data-layout-to-not-assert.ll new file mode 100644 index 0000000000000..d0d414a869636 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/handle-iptr-with-data-layout-to-not-assert.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9] for VF [0-9] For instruction:\s*store ptr %[0-9], ptr %__last" --filter "LV: Found an estimated cost of [0-9] for VF [0-9] For instruction:\s*store ptr %[0-9]" --version 5 +; REQUIRES: asserts +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s +target triple = "x86_64-unknown-linux-gnu" + +define ptr @foo(ptr %__first, ptr %__last) #0 { +; CHECK-LABEL: 'foo' +; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: store ptr %0, ptr %__last, align 8 +; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: store ptr %0, ptr %__last, align 8 +; CHECK: LV: Found an estimated cost of 3 for VF 4 For instruction: store ptr %0, ptr %__last, align 8 +; CHECK: LV: Found an estimated cost of 3 for VF 8 For instruction: store ptr %0, ptr %__last, align 8 +; +entry: + %cmp.not1 = icmp eq ptr %__first, %__last + br i1 %cmp.not1, label %for.end, label %for.body.preheader + +for.body.preheader: + br label %for.body + +for.body: + %__first.addr.02 = phi ptr [ %incdec.ptr, %for.body ], [ %__first, %for.body.preheader ] + %0 = load ptr, ptr %__first.addr.02, align 8 + store ptr %0, ptr %__last, align 8 + %incdec.ptr = getelementptr inbounds i8, ptr %__first.addr.02, i64 16 + %cmp.not = icmp eq ptr %incdec.ptr, %__last + br i1 %cmp.not, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret ptr null +} + +attributes #0 = { "target-cpu"="znver4" } diff --git a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll index 809b15b200495..81d8b01fe7fb7 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll @@ -130,8 +130,16 @@ define void @neg_dist_dep_type_size_equivalence(ptr nocapture %vec, i64 %n) { ; CHECK-LABEL: 'neg_dist_dep_type_size_equivalence' ; CHECK-NEXT: loop: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Backward loop carried data dependence that prevents store-to-load forwarding. +; CHECK-NEXT: Unknown data dependence. ; CHECK-NEXT: Dependences: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n.i64, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %ld.i64 = load i64, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n.i64, align 8 +; CHECK-EMPTY: ; CHECK-NEXT: BackwardVectorizableButPreventsForwarding: ; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 -> ; CHECK-NEXT: store double %val, ptr %gep.iv.101.i64, align 8 diff --git a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll index 845ff078ee0eb..416742a94e0d3 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll @@ -45,8 +45,13 @@ exit: define void @different_non_constant_strides_known_backward_distance_larger_than_trip_count(ptr %A) { ; CHECK-LABEL: 'different_non_constant_strides_known_backward_distance_larger_than_trip_count' ; CHECK-NEXT: loop: -; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unknown data dependence. ; CHECK-NEXT: Dependences: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %l = load i32, ptr %gep, align 4 -> +; CHECK-NEXT: store i32 %add, ptr %gep.mul.2, align 4 +; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: ; CHECK-EMPTY: diff --git a/llvm/test/Analysis/ScalarEvolution/predicated-symbolic-max-backedge-taken-count.ll b/llvm/test/Analysis/ScalarEvolution/predicated-symbolic-max-backedge-taken-count.ll new file mode 100644 index 0000000000000..8dc79a54eb97a --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/predicated-symbolic-max-backedge-taken-count.ll @@ -0,0 +1,83 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='print' -scalar-evolution-classify-expressions=0 -disable-output %s 2>&1 | FileCheck %s + +; %i and %i + 1 can overflow. +define void @test1(i64 %x, ptr %a, ptr %b) { +; CHECK-LABEL: 'test1' +; CHECK-NEXT: Determining loop execution counts for: @test1 +; CHECK-NEXT: Loop %header: Unpredictable backedge-taken count. +; CHECK-NEXT: exit count for header: ***COULDNOTCOMPUTE*** +; CHECK-NEXT: exit count for latch: ***COULDNOTCOMPUTE*** +; CHECK-NEXT: Loop %header: Unpredictable constant max backedge-taken count. +; CHECK-NEXT: Loop %header: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: symbolic max exit count for header: ***COULDNOTCOMPUTE*** +; CHECK-NEXT: symbolic max exit count for latch: ***COULDNOTCOMPUTE*** +; CHECK-NEXT: Loop %header: Predicated symbolic max backedge-taken count is (-1 + (1 umax %x)) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {1,+,1}<%header> Added Flags: +; +entry: + br label %header + +header: + %conv11 = phi i64 [ 0, %entry ], [ %conv, %latch ] + %i.010 = phi i32 [ 0, %entry ], [ %add, %latch ] + %add = add i32 %i.010, 1 + %idxprom = zext i32 %add to i64 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %idxprom + %ld = load i32, ptr %arrayidx, align 4 + %uncountable.c = icmp eq i32 %ld, 10 + br i1 %uncountable.c, label %exit, label %latch + +latch: + %add2 = add nsw i32 %ld, 1 + %arrayidx4 = getelementptr inbounds i32, ptr %b, i64 %conv11 + store i32 %add2, ptr %arrayidx4, align 4 + %conv = zext i32 %add to i64 + %cmp = icmp ult i64 %conv, %x + br i1 %cmp, label %header, label %exit + +exit: + ret void +} + +; %i can overflow. +; +; We need to check that i doesn't wrap, but we don't need a run-time alias +; check. We also need an extra no-wrap check to get the backedge taken count. +define void @test2(i64 %x, ptr %a) { +; CHECK-LABEL: 'test2' +; CHECK-NEXT: Determining loop execution counts for: @test2 +; CHECK-NEXT: Loop %header: Unpredictable backedge-taken count. +; CHECK-NEXT: exit count for header: ***COULDNOTCOMPUTE*** +; CHECK-NEXT: exit count for latch: ***COULDNOTCOMPUTE*** +; CHECK-NEXT: Loop %header: Unpredictable constant max backedge-taken count. +; CHECK-NEXT: Loop %header: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: symbolic max exit count for header: ***COULDNOTCOMPUTE*** +; CHECK-NEXT: symbolic max exit count for latch: ***COULDNOTCOMPUTE*** +; CHECK-NEXT: Loop %header: Predicated symbolic max backedge-taken count is (-1 + (1 umax %x)) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {1,+,1}<%header> Added Flags: +; +entry: + br label %header + +header: + %conv11 = phi i64 [ 0, %entry ], [ %conv, %latch ] + %i.010 = phi i32 [ 0, %entry ], [ %inc, %latch ] + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %conv11 + %ld = load i32, ptr %arrayidx, align 4 + %uncountable.c = icmp eq i32 %ld, 10 + br i1 %uncountable.c, label %exit, label %latch + +latch: + %add = add nsw i32 %ld, 1 + store i32 %add, ptr %arrayidx, align 4 + %inc = add i32 %i.010, 1 + %conv = zext i32 %inc to i64 + %cmp = icmp ult i64 %conv, %x + br i1 %cmp, label %header, label %exit + +exit: + ret void +} diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-headers.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-headers.ll index 335026dc9b62b..efad77b684a75 100644 --- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-headers.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-headers.ll @@ -90,7 +90,7 @@ S: br i1 %cond.uni, label %exit, label %T T: -; CHECK-NIT: DIVERGENT: %tt.phi = phi i32 +; CHECK-NOT: DIVERGENT: %tt.phi = phi i32 %tt.phi = phi i32 [ %ss, %S ], [ %a, %entry ] %tt = add i32 %b, 1 br label %P diff --git a/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll b/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll index 47c4587f6991b..ba3a484441e9e 100644 --- a/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll +++ b/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll @@ -428,9 +428,9 @@ define i8 @known_power_of_two_lshr_add_one_allow_zero(i8 %x, i8 %y) { define i1 @known_power_of_two_lshr_add_one_nuw_deny_zero(i8 %x, i8 %y) { ; CHECK-LABEL: @known_power_of_two_lshr_add_one_nuw_deny_zero( ; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 -1, [[X:%.*]] -; CHECK-NEXT: [[P:%.*]] = add nuw i8 [[TMP1]], 1 -; CHECK-NEXT: [[AND:%.*]] = and i8 [[P]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[AND]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = sub i8 -2, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[TMP2]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[TMP3]], -1 ; CHECK-NEXT: ret i1 [[R]] ; %4 = lshr i8 -1, %x @@ -445,9 +445,9 @@ define i1 @known_power_of_two_lshr_add_one_nuw_deny_zero(i8 %x, i8 %y) { define i1 @negative_known_power_of_two_lshr_add_one_deny_zero(i8 %x, i8 %y) { ; CHECK-LABEL: @negative_known_power_of_two_lshr_add_one_deny_zero( ; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 -1, [[X:%.*]] -; CHECK-NEXT: [[P:%.*]] = add i8 [[TMP1]], 1 -; CHECK-NEXT: [[AND:%.*]] = and i8 [[P]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[AND]], [[P]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i8 -2, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[TMP2]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[TMP3]], -1 ; CHECK-NEXT: ret i1 [[R]] ; %4 = lshr i8 -1, %x @@ -462,9 +462,9 @@ define i1 @negative_known_power_of_two_lshr_add_one_deny_zero(i8 %x, i8 %y) { define i1 @negative_known_power_of_two_lshr_add_one_nsw_deny_zero(i8 %x, i8 %y) { ; CHECK-LABEL: @negative_known_power_of_two_lshr_add_one_nsw_deny_zero( ; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 -1, [[X:%.*]] -; CHECK-NEXT: [[P:%.*]] = add nsw i8 [[TMP1]], 1 -; CHECK-NEXT: [[AND:%.*]] = and i8 [[P]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[AND]], [[P]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i8 -2, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[TMP2]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[TMP3]], -1 ; CHECK-NEXT: ret i1 [[R]] ; %4 = lshr i8 -1, %x diff --git a/llvm/test/Assembler/invalid-ptrauth-const1.ll b/llvm/test/Assembler/invalid-ptrauth-const1.ll new file mode 100644 index 0000000000000..fba2e23078238 --- /dev/null +++ b/llvm/test/Assembler/invalid-ptrauth-const1.ll @@ -0,0 +1,6 @@ +; RUN: not llvm-as < %s 2>&1 | FileCheck %s + +@var = global i32 0 + +; CHECK: error: constant ptrauth base pointer must be a pointer +@auth_var = global ptr ptrauth (i32 42, i32 0) diff --git a/llvm/test/Assembler/invalid-ptrauth-const2.ll b/llvm/test/Assembler/invalid-ptrauth-const2.ll new file mode 100644 index 0000000000000..4499c42601c99 --- /dev/null +++ b/llvm/test/Assembler/invalid-ptrauth-const2.ll @@ -0,0 +1,6 @@ +; RUN: not llvm-as < %s 2>&1 | FileCheck %s + +@var = global i32 0 + +; CHECK: error: constant ptrauth key must be i32 constant +@auth_var = global ptr ptrauth (ptr @var, i32 ptrtoint (ptr @var to i32)) diff --git a/llvm/test/Assembler/invalid-ptrauth-const3.ll b/llvm/test/Assembler/invalid-ptrauth-const3.ll new file mode 100644 index 0000000000000..3f2688d92a001 --- /dev/null +++ b/llvm/test/Assembler/invalid-ptrauth-const3.ll @@ -0,0 +1,6 @@ +; RUN: not llvm-as < %s 2>&1 | FileCheck %s + +@var = global i32 0 + +; CHECK: error: constant ptrauth address discriminator must be a pointer +@auth_var = global ptr ptrauth (ptr @var, i32 2, i64 65535, i8 0) diff --git a/llvm/test/Assembler/invalid-ptrauth-const4.ll b/llvm/test/Assembler/invalid-ptrauth-const4.ll new file mode 100644 index 0000000000000..843a220458a61 --- /dev/null +++ b/llvm/test/Assembler/invalid-ptrauth-const4.ll @@ -0,0 +1,6 @@ +; RUN: not llvm-as < %s 2>&1 | FileCheck %s + +@var = global i32 0 + +; CHECK: error: constant ptrauth integer discriminator must be i64 constant +@auth_var = global ptr ptrauth (ptr @var, i32 2, ptr null, i64 ptrtoint (ptr @var to i64)) diff --git a/llvm/test/Assembler/invalid-ptrauth-const5.ll b/llvm/test/Assembler/invalid-ptrauth-const5.ll new file mode 100644 index 0000000000000..9b47f6f5f423f --- /dev/null +++ b/llvm/test/Assembler/invalid-ptrauth-const5.ll @@ -0,0 +1,6 @@ +; RUN: not llvm-as < %s 2>&1 | FileCheck %s + +@var = global i32 0 + +; CHECK: error: constant ptrauth integer discriminator must be i64 constant +@auth_var = global ptr ptrauth (ptr @var, i32 2, ptr @var)) diff --git a/llvm/test/Assembler/non-global-value-max-name-size-2.ll b/llvm/test/Assembler/non-global-value-max-name-size-2.ll new file mode 100644 index 0000000000000..5eac003ddb438 --- /dev/null +++ b/llvm/test/Assembler/non-global-value-max-name-size-2.ll @@ -0,0 +1,23 @@ +; RUN: opt < %s -S -passes='always-inline' -non-global-value-max-name-size=5 | opt -non-global-value-max-name-size=5 -passes=verify -disable-output + +; Opt should not generate too long name for labels during inlining. + +define internal i32 @inner(i32 %flag) alwaysinline { +entry: + %icmp = icmp slt i32 %flag, 0 + br i1 %icmp, label %one, label %two + +one: + ret i32 42 + +two: + ret i32 44 +} + +define i32 @outer(i32 %x) { +entry: + %call1 = call i32 @inner(i32 %x) + %call2 = call i32 @inner(i32 %x) + %ret = add i32 %call1, %call2 + ret i32 %ret +} \ No newline at end of file diff --git a/llvm/test/Assembler/ptrauth-const.ll b/llvm/test/Assembler/ptrauth-const.ll new file mode 100644 index 0000000000000..94d35146d5927 --- /dev/null +++ b/llvm/test/Assembler/ptrauth-const.ll @@ -0,0 +1,24 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s + +@var = global i32 0 + +; CHECK: @basic = global ptr ptrauth (ptr @var, i32 0) +@basic = global ptr ptrauth (ptr @var, i32 0) + +; CHECK: @keyed = global ptr ptrauth (ptr @var, i32 3) +@keyed = global ptr ptrauth (ptr @var, i32 3) + +; CHECK: @intdisc = global ptr ptrauth (ptr @var, i32 0, i64 -1) +@intdisc = global ptr ptrauth (ptr @var, i32 0, i64 -1) + +; CHECK: @addrdisc = global ptr ptrauth (ptr @var, i32 2, i64 1234, ptr @addrdisc) +@addrdisc = global ptr ptrauth (ptr @var, i32 2, i64 1234, ptr @addrdisc) + + +@var1 = addrspace(1) global i32 0 + +; CHECK: @addrspace = global ptr addrspace(1) ptrauth (ptr addrspace(1) @var1, i32 0) +@addrspace = global ptr addrspace(1) ptrauth (ptr addrspace(1) @var1, i32 0) + +; CHECK: @addrspace_addrdisc = addrspace(2) global ptr addrspace(1) ptrauth (ptr addrspace(1) @var1, i32 2, i64 1234, ptr addrspace(2) @addrspace_addrdisc) +@addrspace_addrdisc = addrspace(2) global ptr addrspace(1) ptrauth (ptr addrspace(1) @var1, i32 2, i64 1234, ptr addrspace(2) @addrspace_addrdisc) diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index b374924516d66..2a846e036924c 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -217,6 +217,10 @@ declare void @g.f1() ; CHECK: @g.sanitize_address_dyninit = global i32 0, sanitize_address_dyninit ; CHECK: @g.sanitize_multiple = global i32 0, sanitize_memtag, sanitize_address_dyninit +; ptrauth constant +@auth_var = global ptr ptrauth (ptr @g1, i32 0, i64 65535, ptr null) +; CHECK: @auth_var = global ptr ptrauth (ptr @g1, i32 0, i64 65535) + ;; Aliases ; Format: @ = [Linkage] [Visibility] [DLLStorageClass] [ThreadLocal] ; [unnamed_addr] alias @ diff --git a/llvm/test/Bitcode/value-with-long-name-dbg.ll b/llvm/test/Bitcode/value-with-long-name-dbg.ll new file mode 100644 index 0000000000000..0cc3569d8617b --- /dev/null +++ b/llvm/test/Bitcode/value-with-long-name-dbg.ll @@ -0,0 +1,11 @@ +; REQUIRES: asserts +; Force the size to be small to check assertion message. +; RUN: not --crash opt -S %s -O2 -o - -non-global-value-max-name-size=0 2>&1 | FileCheck %s +; CHECK: Can't generate unique name: MaxNameSize is too small. + +define i32 @f(i32 %a, i32 %b) { + %c = add i32 %a, %b + %d = add i32 %c, %a + %e = add i32 %d, %b + ret i32 %e +} diff --git a/llvm/test/Bitcode/value-with-long-name.ll b/llvm/test/Bitcode/value-with-long-name.ll index 1ca5d133e09ae..aa7da5f5b7dba 100644 --- a/llvm/test/Bitcode/value-with-long-name.ll +++ b/llvm/test/Bitcode/value-with-long-name.ll @@ -1,10 +1,10 @@ ; Check the size of generated variable when no option is set ; RUN: opt -S %s -O2 -o - | FileCheck -check-prefix=CHECK-LONG %s +; RUN: opt -S %s -O2 -o - -non-global-value-max-name-size=-1 | FileCheck -check-prefix=CHECK-LONG %s ; CHECK-LONG: %{{[a-z]{4}[a-z]+}} ; Then check we correctly cap the size of newly generated non-global values name ; Force the size to be small so that the check works on release and debug build -; RUN: opt -S %s -O2 -o - -non-global-value-max-name-size=0 | FileCheck -check-prefix=CHECK-SHORT %s ; RUN: opt -S %s -O2 -o - -non-global-value-max-name-size=1 | FileCheck -check-prefix=CHECK-SHORT %s ; CHECK-SHORT-NOT: %{{[a-z][a-z]+}} @@ -14,5 +14,3 @@ define i32 @f(i32 %a, i32 %b) { %e = add i32 %d, %b ret i32 %e } - - diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt index c942339e43608..2f466c258f677 100644 --- a/llvm/test/CMakeLists.txt +++ b/llvm/test/CMakeLists.txt @@ -26,6 +26,7 @@ llvm_canonicalize_cmake_booleans( LLVM_TOOL_LLVM_DRIVER_BUILD LLVM_INCLUDE_SPIRV_TOOLS_TESTS LLVM_APPEND_VC_REV + LLVM_HAS_LOGF128 ) configure_lit_site_cfg( diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-lse2.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-lse2.ll index e7e231bc344d9..3732d4feb0c67 100644 --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-lse2.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-lse2.ll @@ -566,6 +566,119 @@ define dso_local i128 @load_atomic_i128_unaligned_seq_cst_const(ptr readonly %pt %r = load atomic i128, ptr %ptr seq_cst, align 1 ret i128 %r } + +define dso_local half @load_atomic_f16_aligned_unordered(ptr %ptr) { +; CHECK-LABEL: load_atomic_f16_aligned_unordered: +; CHECK: ldrh w8, [x0] + %r = load atomic half, ptr %ptr unordered, align 2 + ret half %r +} + +define dso_local half @load_atomic_f16_aligned_unordered_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_f16_aligned_unordered_const: +; CHECK: ldrh w8, [x0] + %r = load atomic half, ptr %ptr unordered, align 2 + ret half %r +} + +define dso_local half @load_atomic_f16_aligned_monotonic(ptr %ptr) { +; CHECK-LABEL: load_atomic_f16_aligned_monotonic: +; CHECK: ldrh w8, [x0] + %r = load atomic half, ptr %ptr monotonic, align 2 + ret half %r +} + +define dso_local half @load_atomic_f16_aligned_monotonic_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_f16_aligned_monotonic_const: +; CHECK: ldrh w8, [x0] + %r = load atomic half, ptr %ptr monotonic, align 2 + ret half %r +} + +define dso_local half @load_atomic_f16_aligned_acquire(ptr %ptr) { +; CHECK-LABEL: load_atomic_f16_aligned_acquire: +; CHECK: ldarh w8, [x0] + %r = load atomic half, ptr %ptr acquire, align 2 + ret half %r +} + +define dso_local half @load_atomic_f16_aligned_acquire_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_f16_aligned_acquire_const: +; CHECK: ldarh w8, [x0] + %r = load atomic half, ptr %ptr acquire, align 2 + ret half %r +} + +define dso_local half @load_atomic_f16_aligned_seq_cst(ptr %ptr) { +; CHECK-LABEL: load_atomic_f16_aligned_seq_cst: +; CHECK: ldarh w8, [x0] + %r = load atomic half, ptr %ptr seq_cst, align 2 + ret half %r +} + +define dso_local half @load_atomic_f16_aligned_seq_cst_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_f16_aligned_seq_cst_const: +; CHECK: ldarh w8, [x0] + %r = load atomic half, ptr %ptr seq_cst, align 2 + ret half %r +} + +define dso_local bfloat @load_atomic_bf16_aligned_unordered(ptr %ptr) { +; CHECK-LABEL: load_atomic_bf16_aligned_unordered: +; CHECK: ldrh w8, [x0] + %r = load atomic bfloat, ptr %ptr unordered, align 2 + ret bfloat %r +} + +define dso_local bfloat @load_atomic_bf16_aligned_unordered_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_bf16_aligned_unordered_const: +; CHECK: ldrh w8, [x0] + %r = load atomic bfloat, ptr %ptr unordered, align 2 + ret bfloat %r +} + +define dso_local bfloat @load_atomic_bf16_aligned_monotonic(ptr %ptr) { +; CHECK-LABEL: load_atomic_bf16_aligned_monotonic: +; CHECK: ldrh w8, [x0] + %r = load atomic bfloat, ptr %ptr monotonic, align 2 + ret bfloat %r +} + +define dso_local bfloat @load_atomic_bf16_aligned_monotonic_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_bf16_aligned_monotonic_const: +; CHECK: ldrh w8, [x0] + %r = load atomic bfloat, ptr %ptr monotonic, align 2 + ret bfloat %r +} + +define dso_local bfloat @load_atomic_bf16_aligned_acquire(ptr %ptr) { +; CHECK-LABEL: load_atomic_bf16_aligned_acquire: +; CHECK: ldarh w8, [x0] + %r = load atomic bfloat, ptr %ptr acquire, align 2 + ret bfloat %r +} + +define dso_local bfloat @load_atomic_bf16_aligned_acquire_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_bf16_aligned_acquire_const: +; CHECK: ldarh w8, [x0] + %r = load atomic bfloat, ptr %ptr acquire, align 2 + ret bfloat %r +} + +define dso_local bfloat @load_atomic_bf16_aligned_seq_cst(ptr %ptr) { +; CHECK-LABEL: load_atomic_bf16_aligned_seq_cst: +; CHECK: ldarh w8, [x0] + %r = load atomic bfloat, ptr %ptr seq_cst, align 2 + ret bfloat %r +} + +define dso_local bfloat @load_atomic_bf16_aligned_seq_cst_const(ptr readonly %ptr) { +; CHECK-LABEL: load_atomic_bf16_aligned_seq_cst_const: +; CHECK: ldarh w8, [x0] + %r = load atomic bfloat, ptr %ptr seq_cst, align 2 + ret bfloat %r +} + ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; -O0: {{.*}} ; -O1: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-add.mir index fad3655da9d01..a0142afd06777 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-add.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-add.mir @@ -207,3 +207,126 @@ body: | %3:_(<4 x s32>) = G_FADD %0, %2(<4 x s32>) $q0 = COPY %3(<4 x s32>) ... +--- +name: saddl_v8i8_v8i32 +tracksRegLiveness: true +body: | + bb.1: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: saddl_v8i8_v8i32 + ; CHECK: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(<8 x s16>) = G_SEXT [[COPY]](<8 x s8>) + ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(<8 x s16>) = G_SEXT [[COPY1]](<8 x s8>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s16>) = G_ADD [[SEXT]], [[SEXT1]] + ; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(<8 x s32>) = G_SEXT [[ADD]](<8 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[SEXT2]](<8 x s32>) + ; CHECK-NEXT: $q0 = COPY [[UV]](<4 x s32>) + ; CHECK-NEXT: $q1 = COPY [[UV1]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1 + %0:_(<8 x s8>) = COPY $d0 + %1:_(<8 x s8>) = COPY $d1 + %2:_(<8 x s32>) = G_SEXT %0(<8 x s8>) + %3:_(<8 x s32>) = G_SEXT %1(<8 x s8>) + %4:_(<8 x s32>) = G_ADD %2, %3 + %5:_(<4 x s32>), %6:_(<4 x s32>) = G_UNMERGE_VALUES %4(<8 x s32>) + $q0 = COPY %5(<4 x s32>) + $q1 = COPY %6(<4 x s32>) + RET_ReallyLR implicit $q0, implicit $q1 +... + +--- +name: uaddl_v8i8_v8i32 +tracksRegLiveness: true +body: | + bb.1: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: uaddl_v8i8_v8i32 + ; CHECK: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(<8 x s16>) = G_ZEXT [[COPY]](<8 x s8>) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(<8 x s16>) = G_ZEXT [[COPY1]](<8 x s8>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s16>) = G_ADD [[ZEXT]], [[ZEXT1]] + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(<8 x s32>) = G_ZEXT [[ADD]](<8 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[ZEXT2]](<8 x s32>) + ; CHECK-NEXT: $q0 = COPY [[UV]](<4 x s32>) + ; CHECK-NEXT: $q1 = COPY [[UV1]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1 + %0:_(<8 x s8>) = COPY $d0 + %1:_(<8 x s8>) = COPY $d1 + %2:_(<8 x s32>) = G_ZEXT %0(<8 x s8>) + %3:_(<8 x s32>) = G_ZEXT %1(<8 x s8>) + %4:_(<8 x s32>) = G_ADD %2, %3 + %5:_(<4 x s32>), %6:_(<4 x s32>) = G_UNMERGE_VALUES %4(<8 x s32>) + $q0 = COPY %5(<4 x s32>) + $q1 = COPY %6(<4 x s32>) + RET_ReallyLR implicit $q0, implicit $q1 +... + +--- +name: ssubl_v8i8_v8i32 +tracksRegLiveness: true +body: | + bb.1: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: ssubl_v8i8_v8i32 + ; CHECK: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(<8 x s16>) = G_SEXT [[COPY]](<8 x s8>) + ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(<8 x s16>) = G_SEXT [[COPY1]](<8 x s8>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s16>) = G_SUB [[SEXT]], [[SEXT1]] + ; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(<8 x s32>) = G_SEXT [[SUB]](<8 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[SEXT2]](<8 x s32>) + ; CHECK-NEXT: $q0 = COPY [[UV]](<4 x s32>) + ; CHECK-NEXT: $q1 = COPY [[UV1]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1 + %0:_(<8 x s8>) = COPY $d0 + %1:_(<8 x s8>) = COPY $d1 + %2:_(<8 x s32>) = G_SEXT %0(<8 x s8>) + %3:_(<8 x s32>) = G_SEXT %1(<8 x s8>) + %4:_(<8 x s32>) = G_SUB %2, %3 + %5:_(<4 x s32>), %6:_(<4 x s32>) = G_UNMERGE_VALUES %4(<8 x s32>) + $q0 = COPY %5(<4 x s32>) + $q1 = COPY %6(<4 x s32>) + RET_ReallyLR implicit $q0, implicit $q1 +... + +--- +name: usubl_v8i8_v8i32 +tracksRegLiveness: true +body: | + bb.1: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: usubl_v8i8_v8i32 + ; CHECK: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(<8 x s16>) = G_ZEXT [[COPY]](<8 x s8>) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(<8 x s16>) = G_ZEXT [[COPY1]](<8 x s8>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s16>) = G_SUB [[ZEXT]], [[ZEXT1]] + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(<8 x s32>) = G_SEXT [[SUB]](<8 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[SEXT]](<8 x s32>) + ; CHECK-NEXT: $q0 = COPY [[UV]](<4 x s32>) + ; CHECK-NEXT: $q1 = COPY [[UV1]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1 + %0:_(<8 x s8>) = COPY $d0 + %1:_(<8 x s8>) = COPY $d1 + %2:_(<8 x s32>) = G_ZEXT %0(<8 x s8>) + %3:_(<8 x s32>) = G_ZEXT %1(<8 x s8>) + %4:_(<8 x s32>) = G_SUB %2, %3 + %5:_(<4 x s32>), %6:_(<4 x s32>) = G_UNMERGE_VALUES %4(<8 x s32>) + $q0 = COPY %5(<4 x s32>) + $q1 = COPY %6(<4 x s32>) + RET_ReallyLR implicit $q0, implicit $q1 +... diff --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll index 94b792b887eb4..def4192b0e005 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll @@ -94,18 +94,19 @@ define i32 @oversized_ADDV_256(ptr noalias nocapture readonly %arg1, ptr noalias ; ; GISEL-LABEL: oversized_ADDV_256: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: ldr d0, [x0] -; GISEL-NEXT: ldr d1, [x1] -; GISEL-NEXT: ushll v0.8h, v0.8b, #0 -; GISEL-NEXT: ushll v1.8h, v1.8b, #0 -; GISEL-NEXT: usubl v2.4s, v0.4h, v1.4h -; GISEL-NEXT: usubl2 v0.4s, v0.8h, v1.8h -; GISEL-NEXT: cmlt v1.4s, v2.4s, #0 -; GISEL-NEXT: cmlt v3.4s, v0.4s, #0 -; GISEL-NEXT: neg v4.4s, v2.4s -; GISEL-NEXT: neg v5.4s, v0.4s -; GISEL-NEXT: bsl v1.16b, v4.16b, v2.16b -; GISEL-NEXT: bit v0.16b, v5.16b, v3.16b +; GISEL-NEXT: ldr d1, [x0] +; GISEL-NEXT: ldr d2, [x1] +; GISEL-NEXT: movi v0.2d, #0000000000000000 +; GISEL-NEXT: usubl v1.8h, v1.8b, v2.8b +; GISEL-NEXT: sshll v2.4s, v1.4h, #0 +; GISEL-NEXT: sshll2 v3.4s, v1.8h, #0 +; GISEL-NEXT: ssubw2 v0.4s, v0.4s, v1.8h +; GISEL-NEXT: cmlt v4.4s, v2.4s, #0 +; GISEL-NEXT: cmlt v5.4s, v3.4s, #0 +; GISEL-NEXT: neg v6.4s, v2.4s +; GISEL-NEXT: mov v1.16b, v4.16b +; GISEL-NEXT: bif v0.16b, v3.16b, v5.16b +; GISEL-NEXT: bsl v1.16b, v6.16b, v2.16b ; GISEL-NEXT: add v0.4s, v1.4s, v0.4s ; GISEL-NEXT: addv s0, v0.4s ; GISEL-NEXT: fmov w0, s0 diff --git a/llvm/test/CodeGen/AArch64/arm64-tbl.ll b/llvm/test/CodeGen/AArch64/arm64-tbl.ll index b89232c03f136..44b92e6ccd088 100644 --- a/llvm/test/CodeGen/AArch64/arm64-tbl.ll +++ b/llvm/test/CodeGen/AArch64/arm64-tbl.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind { ; CHECK-LABEL: tbl1_8b: @@ -20,175 +21,378 @@ define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind { } define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) { -; CHECK-LABEL: tbl2_8b: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl.8b v0, { v0, v1 }, v2 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: tbl2_8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v2 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbl2_8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: tbl.8b v0, { v0, v1 }, v2 +; CHECK-GI-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) ret <8 x i8> %tmp3 } define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { -; CHECK-LABEL: tbl2_16b: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: tbl2_16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbl2_16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-GI-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) ret <16 x i8> %tmp3 } define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { -; CHECK-LABEL: tbl3_8b: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: tbl.8b v0, { v0, v1, v2 }, v3 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: tbl3_8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: tbl.8b v0, { v0, v1, v2 }, v3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbl3_8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: tbl.8b v0, { v0, v1, v2 }, v3 +; CHECK-GI-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) ret <8 x i8> %tmp3 } define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { -; CHECK-LABEL: tbl3_16b: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: tbl.16b v0, { v0, v1, v2 }, v3 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: tbl3_16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2 }, v3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbl3_16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: tbl.16b v0, { v0, v1, v2 }, v3 +; CHECK-GI-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) ret <16 x i8> %tmp3 } define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { -; CHECK-LABEL: tbl4_8b: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: tbl4_8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbl4_8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4 +; CHECK-GI-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) ret <8 x i8> %tmp3 } define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { -; CHECK-LABEL: tbl4_16b: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: tbl4_16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbl4_16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 +; CHECK-GI-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) ret <16 x i8> %tmp3 } -; CHECK-LABEL: .LCPI8_0: -; CHECK-NEXT: .byte 0 // 0x0 -; CHECK-NEXT: .byte 4 // 0x4 -; CHECK-NEXT: .byte 8 // 0x8 -; CHECK-NEXT: .byte 12 // 0xc -; CHECK-NEXT: .byte 255 // 0xff -; CHECK-NEXT: .byte 255 // 0xff -; CHECK-NEXT: .byte 255 // 0xff -; CHECK-NEXT: .byte 255 // 0xff +; CHECK-SD-LABEL: .LCPI8_0: +; CHECK-SD: .byte 0 // 0x0 +; CHECK-SD-NEXT: .byte 4 // 0x4 +; CHECK-SD-NEXT: .byte 8 // 0x8 +; CHECK-SD-NEXT: .byte 12 // 0xc +; CHECK-SD-NEXT: .byte 255 // 0xff +; CHECK-SD-NEXT: .byte 255 // 0xff +; CHECK-SD-NEXT: .byte 255 // 0xff +; CHECK-SD-NEXT: .byte 255 // 0xff + +; CHECK-GI-LABEL: .LCPI8_0: +; CHECK-GI: .byte 0 // 0x0 +; CHECK-GI-NEXT: .byte 1 // 0x1 +; CHECK-GI-NEXT: .byte 2 // 0x2 +; CHECK-GI-NEXT: .byte 3 // 0x3 +; CHECK-GI-NEXT: .byte 12 // 0xc +; CHECK-GI-NEXT: .byte 13 // 0xd +; CHECK-GI-NEXT: .byte 14 // 0xe +; CHECK-GI-NEXT: .byte 15 // 0xf +; CHECK-GI-LABEL: .LCPI8_1: +; CHECK-GI: .byte 0 // 0x0 +; CHECK-GI-NEXT: .byte 4 // 0x4 +; CHECK-GI-NEXT: .byte 8 // 0x8 +; CHECK-GI-NEXT: .byte 12 // 0xc +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { -; CHECK-LABEL: shuffled_tbl2_to_tbl4_v8i8: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI8_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 -; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI8_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 -; CHECK-NEXT: tbl.8b v0, { v0, v1 }, v4 -; CHECK-NEXT: tbl.8b v1, { v2, v3 }, v4 -; CHECK-NEXT: mov.s v0[1], v1[1] -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_v8i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adrp x8, .LCPI8_0 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 +; CHECK-SD-NEXT: ldr d4, [x8, :lo12:.LCPI8_0] +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 +; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v4 +; CHECK-SD-NEXT: tbl.8b v1, { v2, v3 }, v4 +; CHECK-SD-NEXT: mov.s v0[1], v1[1] +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_v8i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI8_1 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 +; CHECK-GI-NEXT: ldr d4, [x8, :lo12:.LCPI8_1] +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 +; CHECK-GI-NEXT: adrp x8, .LCPI8_0 +; CHECK-GI-NEXT: tbl.8b v0, { v0, v1 }, v4 +; CHECK-GI-NEXT: tbl.8b v1, { v2, v3 }, v4 +; CHECK-GI-NEXT: mov.d v0[1], v1[0] +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI8_0] +; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> ) %t2 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %c, <16 x i8> %d, <8 x i8> ) %s = shufflevector <8 x i8> %t1, <8 x i8> %t2, <8 x i32> ret <8 x i8> %s } -; CHECK-LABEL: .LCPI9_0: -; CHECK-NEXT: .byte 0 // 0x0 -; CHECK-NEXT: .byte 4 // 0x4 -; CHECK-NEXT: .byte 8 // 0x8 -; CHECK-NEXT: .byte 12 // 0xc -; CHECK-NEXT: .byte 16 // 0x10 -; CHECK-NEXT: .byte 20 // 0x14 -; CHECK-NEXT: .byte 24 // 0x18 -; CHECK-NEXT: .byte 28 // 0x1c -; CHECK-NEXT: .byte 32 // 0x20 -; CHECK-NEXT: .byte 36 // 0x24 -; CHECK-NEXT: .byte 40 // 0x28 -; CHECK-NEXT: .byte 44 // 0x2c -; CHECK-NEXT: .byte 48 // 0x30 -; CHECK-NEXT: .byte 52 // 0x34 -; CHECK-NEXT: .byte 56 // 0x38 -; CHECK-NEXT: .byte 60 // 0x3c +; CHECK-SD-LABEL: .LCPI9_0: +; CHECK-SD-NEXT: .byte 0 // 0x0 +; CHECK-SD-NEXT: .byte 4 // 0x4 +; CHECK-SD-NEXT: .byte 8 // 0x8 +; CHECK-SD-NEXT: .byte 12 // 0xc +; CHECK-SD-NEXT: .byte 16 // 0x10 +; CHECK-SD-NEXT: .byte 20 // 0x14 +; CHECK-SD-NEXT: .byte 24 // 0x18 +; CHECK-SD-NEXT: .byte 28 // 0x1c +; CHECK-SD-NEXT: .byte 32 // 0x20 +; CHECK-SD-NEXT: .byte 36 // 0x24 +; CHECK-SD-NEXT: .byte 40 // 0x28 +; CHECK-SD-NEXT: .byte 44 // 0x2c +; CHECK-SD-NEXT: .byte 48 // 0x30 +; CHECK-SD-NEXT: .byte 52 // 0x34 +; CHECK-SD-NEXT: .byte 56 // 0x38 +; CHECK-SD-NEXT: .byte 60 // 0x3c + +;CHECK-GI-LABEL: .LCPI9_0: +;CHECK-GI: .byte 0 // 0x0 +;CHECK-GI-NEXT: .byte 1 // 0x1 +;CHECK-GI-NEXT: .byte 2 // 0x2 +;CHECK-GI-NEXT: .byte 3 // 0x3 +;CHECK-GI-NEXT: .byte 4 // 0x4 +;CHECK-GI-NEXT: .byte 5 // 0x5 +;CHECK-GI-NEXT: .byte 6 // 0x6 +;CHECK-GI-NEXT: .byte 7 // 0x7 +;CHECK-GI-NEXT: .byte 16 // 0x10 +;CHECK-GI-NEXT: .byte 17 // 0x11 +;CHECK-GI-NEXT: .byte 18 // 0x12 +;CHECK-GI-NEXT: .byte 19 // 0x13 +;CHECK-GI-NEXT: .byte 20 // 0x14 +;CHECK-GI-NEXT: .byte 21 // 0x15 +;CHECK-GI-NEXT: .byte 22 // 0x16 +;CHECK-GI-NEXT: .byte 23 // 0x17 +;CHECK-GI-LABEL: .LCPI9_1: +;CHECK-GI: .byte 0 // 0x0 +;CHECK-GI-NEXT: .byte 4 // 0x4 +;CHECK-GI-NEXT: .byte 8 // 0x8 +;CHECK-GI-NEXT: .byte 12 // 0xc +;CHECK-GI-NEXT: .byte 16 // 0x10 +;CHECK-GI-NEXT: .byte 20 // 0x14 +;CHECK-GI-NEXT: .byte 24 // 0x18 +;CHECK-GI-NEXT: .byte 28 // 0x1c +;CHECK-GI-NEXT: .byte 255 // 0xff +;CHECK-GI-NEXT: .byte 255 // 0xff +;CHECK-GI-NEXT: .byte 255 // 0xff +;CHECK-GI-NEXT: .byte 255 // 0xff +;CHECK-GI-NEXT: .byte 255 // 0xff +;CHECK-GI-NEXT: .byte 255 // 0xff +;CHECK-GI-NEXT: .byte 255 // 0xff +;CHECK-GI-NEXT: .byte 255 // 0xff define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { -; CHECK-LABEL: shuffled_tbl2_to_tbl4: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: adrp x8, .LCPI9_0 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI9_0] -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: adrp x8, .LCPI9_0 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI9_0] +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI9_1 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 +; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI9_1] +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 +; CHECK-GI-NEXT: adrp x8, .LCPI9_0 +; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 +; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI9_0] +; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-GI-NEXT: ret %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> ret <16 x i8> %s } +; CHECK-GI-LABEL: .LCPI10_0: +; CHECK-GI: .byte 0 // 0x0 +; CHECK-GI-NEXT: .byte 1 // 0x1 +; CHECK-GI-NEXT: .byte 2 // 0x2 +; CHECK-GI-NEXT: .byte 3 // 0x3 +; CHECK-GI-NEXT: .byte 4 // 0x4 +; CHECK-GI-NEXT: .byte 5 // 0x5 +; CHECK-GI-NEXT: .byte 6 // 0x6 +; CHECK-GI-NEXT: .byte 7 // 0x7 +; CHECK-GI-NEXT: .byte 16 // 0x10 +; CHECK-GI-NEXT: .byte 17 // 0x11 +; CHECK-GI-NEXT: .byte 18 // 0x12 +; CHECK-GI-NEXT: .byte 19 // 0x13 +; CHECK-GI-NEXT: .byte 20 // 0x14 +; CHECK-GI-NEXT: .byte 21 // 0x15 +; CHECK-GI-NEXT: .byte 22 // 0x16 +; CHECK-GI-NEXT: .byte 23 // 0x17 +; CHECK-GI-LABEL: .LCPI10_1: +; CHECK-GI: .byte 0 // 0x0 +; CHECK-GI-NEXT: .byte 4 // 0x4 +; CHECK-GI-NEXT: .byte 8 // 0x8 +; CHECK-GI-NEXT: .byte 12 // 0xc +; CHECK-GI-NEXT: .byte 16 // 0x10 +; CHECK-GI-NEXT: .byte 20 // 0x14 +; CHECK-GI-NEXT: .byte 24 // 0x18 +; CHECK-GI-NEXT: .byte 28 // 0x1c +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff + define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { -; CHECK-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask: -; CHECK: // %bb.0: -; CHECK-NEXT: fmov s4, w0 -; CHECK-NEXT: mov w8, #32 // =0x20 -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: mov.b v4[1], w0 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: mov.b v4[2], w0 -; CHECK-NEXT: mov.b v4[3], w0 -; CHECK-NEXT: mov.b v4[4], w0 -; CHECK-NEXT: mov.b v4[5], w0 -; CHECK-NEXT: mov.b v4[6], w0 -; CHECK-NEXT: mov.b v4[7], w0 -; CHECK-NEXT: mov.b v4[8], w8 -; CHECK-NEXT: mov w8, #36 // =0x24 -; CHECK-NEXT: mov.b v4[9], w8 -; CHECK-NEXT: mov w8, #40 // =0x28 -; CHECK-NEXT: mov.b v4[10], w8 -; CHECK-NEXT: mov w8, #44 // =0x2c -; CHECK-NEXT: mov.b v4[11], w8 -; CHECK-NEXT: mov w8, #48 // =0x30 -; CHECK-NEXT: mov.b v4[12], w8 -; CHECK-NEXT: mov w8, #52 // =0x34 -; CHECK-NEXT: mov.b v4[13], w8 -; CHECK-NEXT: mov w8, #56 // =0x38 -; CHECK-NEXT: mov.b v4[14], w8 -; CHECK-NEXT: mov w8, #60 // =0x3c -; CHECK-NEXT: mov.b v4[15], w8 -; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov s4, w0 +; CHECK-SD-NEXT: mov w8, #32 // =0x20 +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: mov.b v4[1], w0 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: mov.b v4[2], w0 +; CHECK-SD-NEXT: mov.b v4[3], w0 +; CHECK-SD-NEXT: mov.b v4[4], w0 +; CHECK-SD-NEXT: mov.b v4[5], w0 +; CHECK-SD-NEXT: mov.b v4[6], w0 +; CHECK-SD-NEXT: mov.b v4[7], w0 +; CHECK-SD-NEXT: mov.b v4[8], w8 +; CHECK-SD-NEXT: mov w8, #36 // =0x24 +; CHECK-SD-NEXT: mov.b v4[9], w8 +; CHECK-SD-NEXT: mov w8, #40 // =0x28 +; CHECK-SD-NEXT: mov.b v4[10], w8 +; CHECK-SD-NEXT: mov w8, #44 // =0x2c +; CHECK-SD-NEXT: mov.b v4[11], w8 +; CHECK-SD-NEXT: mov w8, #48 // =0x30 +; CHECK-SD-NEXT: mov.b v4[12], w8 +; CHECK-SD-NEXT: mov w8, #52 // =0x34 +; CHECK-SD-NEXT: mov.b v4[13], w8 +; CHECK-SD-NEXT: mov w8, #56 // =0x38 +; CHECK-SD-NEXT: mov.b v4[14], w8 +; CHECK-SD-NEXT: mov w8, #60 // =0x3c +; CHECK-SD-NEXT: mov.b v4[15], w8 +; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov s4, w0 +; CHECK-GI-NEXT: mov w8, #255 // =0xff +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 +; CHECK-GI-NEXT: mov.16b v5, v4 +; CHECK-GI-NEXT: mov.b v5[1], v4[0] +; CHECK-GI-NEXT: mov.b v5[2], v4[0] +; CHECK-GI-NEXT: mov.b v5[3], v4[0] +; CHECK-GI-NEXT: mov.b v5[4], v4[0] +; CHECK-GI-NEXT: mov.b v5[5], v4[0] +; CHECK-GI-NEXT: mov.b v5[6], v4[0] +; CHECK-GI-NEXT: mov.b v5[7], v4[0] +; CHECK-GI-NEXT: fmov s4, w8 +; CHECK-GI-NEXT: adrp x8, .LCPI10_1 +; CHECK-GI-NEXT: mov.b v5[8], v4[0] +; CHECK-GI-NEXT: mov.b v5[9], v4[0] +; CHECK-GI-NEXT: mov.b v5[10], v4[0] +; CHECK-GI-NEXT: mov.b v5[11], v4[0] +; CHECK-GI-NEXT: mov.b v5[12], v4[0] +; CHECK-GI-NEXT: mov.b v5[13], v4[0] +; CHECK-GI-NEXT: mov.b v5[14], v4[0] +; CHECK-GI-NEXT: mov.b v5[15], v4[0] +; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI10_1] +; CHECK-GI-NEXT: adrp x8, .LCPI10_0 +; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v5 +; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI10_0] +; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-GI-NEXT: ret %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1 %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2 @@ -211,40 +415,111 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x ret <16 x i8> %s } +; CHECK-GI-LABEL: .LCPI11_0: +; CHECK-GI: .byte 0 // 0x0 +; CHECK-GI-NEXT: .byte 1 // 0x1 +; CHECK-GI-NEXT: .byte 2 // 0x2 +; CHECK-GI-NEXT: .byte 3 // 0x3 +; CHECK-GI-NEXT: .byte 4 // 0x4 +; CHECK-GI-NEXT: .byte 5 // 0x5 +; CHECK-GI-NEXT: .byte 6 // 0x6 +; CHECK-GI-NEXT: .byte 15 // 0xf +; CHECK-GI-NEXT: .byte 16 // 0x10 +; CHECK-GI-NEXT: .byte 17 // 0x11 +; CHECK-GI-NEXT: .byte 18 // 0x12 +; CHECK-GI-NEXT: .byte 19 // 0x13 +; CHECK-GI-NEXT: .byte 20 // 0x14 +; CHECK-GI-NEXT: .byte 21 // 0x15 +; CHECK-GI-NEXT: .byte 22 // 0x16 +; CHECK-GI-NEXT: .byte 31 // 0x1f +; CHECK-GI-LABEL: .LCPI11_1: +; CHECK-GI: .byte 0 // 0x0 +; CHECK-GI-NEXT: .byte 4 // 0x4 +; CHECK-GI-NEXT: .byte 8 // 0x8 +; CHECK-GI-NEXT: .byte 12 // 0xc +; CHECK-GI-NEXT: .byte 16 // 0x10 +; CHECK-GI-NEXT: .byte 20 // 0x14 +; CHECK-GI-NEXT: .byte 24 // 0x18 +; CHECK-GI-NEXT: .byte 28 // 0x1c +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff + define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { -; CHECK-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 // =0x1 -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: mov.b v4[1], w8 -; CHECK-NEXT: mov.b v4[2], w8 -; CHECK-NEXT: mov.b v4[3], w8 -; CHECK-NEXT: mov.b v4[4], w8 -; CHECK-NEXT: mov.b v4[5], w8 -; CHECK-NEXT: mov.b v4[6], w8 -; CHECK-NEXT: mov w8, #32 // =0x20 -; CHECK-NEXT: mov.b v4[7], w0 -; CHECK-NEXT: mov.b v4[8], w8 -; CHECK-NEXT: mov w8, #36 // =0x24 -; CHECK-NEXT: mov.b v4[9], w8 -; CHECK-NEXT: mov w8, #40 // =0x28 -; CHECK-NEXT: mov.b v4[10], w8 -; CHECK-NEXT: mov w8, #44 // =0x2c -; CHECK-NEXT: mov.b v4[11], w8 -; CHECK-NEXT: mov w8, #48 // =0x30 -; CHECK-NEXT: mov.b v4[12], w8 -; CHECK-NEXT: mov w8, #52 // =0x34 -; CHECK-NEXT: mov.b v4[13], w8 -; CHECK-NEXT: mov w8, #56 // =0x38 -; CHECK-NEXT: mov.b v4[14], w8 -; CHECK-NEXT: mov w8, #31 // =0x1f -; CHECK-NEXT: mov.b v4[15], w8 -; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #1 // =0x1 +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: fmov s4, w8 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: mov.b v4[1], w8 +; CHECK-SD-NEXT: mov.b v4[2], w8 +; CHECK-SD-NEXT: mov.b v4[3], w8 +; CHECK-SD-NEXT: mov.b v4[4], w8 +; CHECK-SD-NEXT: mov.b v4[5], w8 +; CHECK-SD-NEXT: mov.b v4[6], w8 +; CHECK-SD-NEXT: mov w8, #32 // =0x20 +; CHECK-SD-NEXT: mov.b v4[7], w0 +; CHECK-SD-NEXT: mov.b v4[8], w8 +; CHECK-SD-NEXT: mov w8, #36 // =0x24 +; CHECK-SD-NEXT: mov.b v4[9], w8 +; CHECK-SD-NEXT: mov w8, #40 // =0x28 +; CHECK-SD-NEXT: mov.b v4[10], w8 +; CHECK-SD-NEXT: mov w8, #44 // =0x2c +; CHECK-SD-NEXT: mov.b v4[11], w8 +; CHECK-SD-NEXT: mov w8, #48 // =0x30 +; CHECK-SD-NEXT: mov.b v4[12], w8 +; CHECK-SD-NEXT: mov w8, #52 // =0x34 +; CHECK-SD-NEXT: mov.b v4[13], w8 +; CHECK-SD-NEXT: mov w8, #56 // =0x38 +; CHECK-SD-NEXT: mov.b v4[14], w8 +; CHECK-SD-NEXT: mov w8, #31 // =0x1f +; CHECK-SD-NEXT: mov.b v4[15], w8 +; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1 // =0x1 +; CHECK-GI-NEXT: fmov s6, w0 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 +; CHECK-GI-NEXT: fmov s4, w8 +; CHECK-GI-NEXT: mov w8, #255 // =0xff +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 +; CHECK-GI-NEXT: mov.16b v5, v4 +; CHECK-GI-NEXT: mov.b v5[1], v4[0] +; CHECK-GI-NEXT: mov.b v5[2], v4[0] +; CHECK-GI-NEXT: mov.b v5[3], v4[0] +; CHECK-GI-NEXT: mov.b v5[4], v4[0] +; CHECK-GI-NEXT: mov.b v5[5], v4[0] +; CHECK-GI-NEXT: mov.b v5[6], v4[0] +; CHECK-GI-NEXT: mov.b v5[7], v4[0] +; CHECK-GI-NEXT: fmov s4, w8 +; CHECK-GI-NEXT: adrp x8, .LCPI11_1 +; CHECK-GI-NEXT: mov.b v5[8], v4[0] +; CHECK-GI-NEXT: mov.b v5[9], v4[0] +; CHECK-GI-NEXT: mov.b v5[10], v4[0] +; CHECK-GI-NEXT: mov.b v5[11], v4[0] +; CHECK-GI-NEXT: mov.b v5[12], v6[0] +; CHECK-GI-NEXT: mov.b v5[13], v6[0] +; CHECK-GI-NEXT: mov.b v5[14], v4[0] +; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI11_1] +; CHECK-GI-NEXT: adrp x8, .LCPI11_0 +; CHECK-GI-NEXT: mov.b v5[15], v6[0] +; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v5 +; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI11_0] +; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-GI-NEXT: ret %ins.0 = insertelement <16 x i8> poison, i8 1, i32 0 %ins.1 = insertelement <16 x i8> %ins.0, i8 1, i32 1 %ins.2 = insertelement <16 x i8> %ins.1, i8 1, i32 2 @@ -267,29 +542,116 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x ret <16 x i8> %s } +; CHECK-SD-LABEL: .LCPI12_0: +; CHECK-SD: .byte 0 // 0x0 +; CHECK-SD-NEXT: .byte 4 // 0x4 +; CHECK-SD-NEXT: .byte 8 // 0x8 +; CHECK-SD-NEXT: .byte 12 // 0xc +; CHECK-SD-NEXT: .byte 16 // 0x10 +; CHECK-SD-NEXT: .byte 20 // 0x14 +; CHECK-SD-NEXT: .byte 24 // 0x18 +; CHECK-SD-NEXT: .byte 28 // 0x1c +; CHECK-SD-NEXT: .byte 255 // 0xff +; CHECK-SD-NEXT: .byte 255 // 0xff +; CHECK-SD-NEXT: .byte 255 // 0xff +; CHECK-SD-NEXT: .byte 255 // 0xff +; CHECK-SD-NEXT: .byte 255 // 0xff +; CHECK-SD-NEXT: .byte 255 // 0xff +; CHECK-SD-NEXT: .byte 255 // 0xff +; CHECK-SD-NEXT: .byte 255 // 0xff + +; CHECK-GI-LABEL: .LCPI12_0: +; CHECK-GI: .byte 0 // 0x0 +; CHECK-GI-NEXT: .byte 1 // 0x1 +; CHECK-GI-NEXT: .byte 2 // 0x2 +; CHECK-GI-NEXT: .byte 3 // 0x3 +; CHECK-GI-NEXT: .byte 4 // 0x4 +; CHECK-GI-NEXT: .byte 5 // 0x5 +; CHECK-GI-NEXT: .byte 6 // 0x6 +; CHECK-GI-NEXT: .byte 7 // 0x7 +; CHECK-GI-NEXT: .byte 16 // 0x10 +; CHECK-GI-NEXT: .byte 17 // 0x11 +; CHECK-GI-NEXT: .byte 18 // 0x12 +; CHECK-GI-NEXT: .byte 19 // 0x13 +; CHECK-GI-NEXT: .byte 20 // 0x14 +; CHECK-GI-NEXT: .byte 21 // 0x15 +; CHECK-GI-NEXT: .byte 22 // 0x16 +; CHECK-GI-NEXT: .byte 23 // 0x17 +; CHECK-GI-LABEL: .LCPI12_1: +; CHECK-GI: .byte 0 // 0x0 +; CHECK-GI-NEXT: .byte 4 // 0x4 +; CHECK-GI-NEXT: .byte 8 // 0x8 +; CHECK-GI-NEXT: .byte 12 // 0xc +; CHECK-GI-NEXT: .byte 16 // 0x10 +; CHECK-GI-NEXT: .byte 20 // 0x14 +; CHECK-GI-NEXT: .byte 24 // 0x18 +; CHECK-GI-NEXT: .byte 28 // 0x1c +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff + define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { -; CHECK-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask: -; CHECK: // %bb.0: -; CHECK-NEXT: movi.2d v4, #0xffffffffffffffff -; CHECK-NEXT: adrp x8, .LCPI12_0 -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI12_0] -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl.16b v2, { v2, v3 }, v5 -; CHECK-NEXT: mov.b v4[0], w0 -; CHECK-NEXT: mov.b v4[1], w0 -; CHECK-NEXT: mov.b v4[2], w0 -; CHECK-NEXT: mov.b v4[3], w0 -; CHECK-NEXT: mov.b v4[4], w0 -; CHECK-NEXT: mov.b v4[5], w0 -; CHECK-NEXT: mov.b v4[6], w0 -; CHECK-NEXT: mov.b v4[7], w0 -; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v4 -; CHECK-NEXT: mov.d v2[1], v0[0] -; CHECK-NEXT: mov.16b v0, v2 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi.2d v4, #0xffffffffffffffff +; CHECK-SD-NEXT: adrp x8, .LCPI12_0 +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ldr q5, [x8, :lo12:.LCPI12_0] +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: tbl.16b v2, { v2, v3 }, v5 +; CHECK-SD-NEXT: mov.b v4[0], w0 +; CHECK-SD-NEXT: mov.b v4[1], w0 +; CHECK-SD-NEXT: mov.b v4[2], w0 +; CHECK-SD-NEXT: mov.b v4[3], w0 +; CHECK-SD-NEXT: mov.b v4[4], w0 +; CHECK-SD-NEXT: mov.b v4[5], w0 +; CHECK-SD-NEXT: mov.b v4[6], w0 +; CHECK-SD-NEXT: mov.b v4[7], w0 +; CHECK-SD-NEXT: tbl.16b v0, { v0, v1 }, v4 +; CHECK-SD-NEXT: mov.d v2[1], v0[0] +; CHECK-SD-NEXT: mov.16b v0, v2 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov s4, w0 +; CHECK-GI-NEXT: mov w8, #255 // =0xff +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: mov.16b v5, v4 +; CHECK-GI-NEXT: mov.b v5[1], v4[0] +; CHECK-GI-NEXT: mov.b v5[2], v4[0] +; CHECK-GI-NEXT: mov.b v5[3], v4[0] +; CHECK-GI-NEXT: mov.b v5[4], v4[0] +; CHECK-GI-NEXT: mov.b v5[5], v4[0] +; CHECK-GI-NEXT: mov.b v5[6], v4[0] +; CHECK-GI-NEXT: mov.b v5[7], v4[0] +; CHECK-GI-NEXT: fmov s4, w8 +; CHECK-GI-NEXT: adrp x8, .LCPI12_1 +; CHECK-GI-NEXT: mov.b v5[8], v4[0] +; CHECK-GI-NEXT: mov.b v5[9], v4[0] +; CHECK-GI-NEXT: mov.b v5[10], v4[0] +; CHECK-GI-NEXT: mov.b v5[11], v4[0] +; CHECK-GI-NEXT: mov.b v5[12], v4[0] +; CHECK-GI-NEXT: mov.b v5[13], v4[0] +; CHECK-GI-NEXT: mov.b v5[14], v4[0] +; CHECK-GI-NEXT: mov.b v5[15], v4[0] +; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI12_1] +; CHECK-GI-NEXT: adrp x8, .LCPI12_0 +; CHECK-GI-NEXT: tbl.16b v2, { v2, v3 }, v4 +; CHECK-GI-NEXT: tbl.16b v3, { v0, v1 }, v5 +; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI12_0] +; CHECK-GI-NEXT: tbl.16b v0, { v2, v3 }, v0 +; CHECK-GI-NEXT: ret %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1 %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2 @@ -312,29 +674,133 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x ret <16 x i8> %s } +; CHECK-SD-LABEL: .LCPI13_0: +; CHECK-SD: .byte 0 // 0x0 +; CHECK-SD-NEXT: .byte 4 // 0x4 +; CHECK-SD-NEXT: .byte 8 // 0x8 +; CHECK-SD-NEXT: .byte 12 // 0xc +; CHECK-SD-NEXT: .byte 16 // 0x10 +; CHECK-SD-NEXT: .byte 20 // 0x14 +; CHECK-SD-NEXT: .byte 24 // 0x18 +; CHECK-SD-NEXT: .byte 28 // 0x1c +; CHECK-SD-NEXT: .byte 255 // 0xff +; CHECK-SD-NEXT: .byte 255 // 0xff +; CHECK-SD-NEXT: .byte 255 // 0xff +; CHECK-SD-NEXT: .byte 255 // 0xff +; CHECK-SD-NEXT: .byte 255 // 0xff +; CHECK-SD-NEXT: .byte 255 // 0xff +; CHECK-SD-NEXT: .byte 255 // 0xff +; CHECK-SD-NEXT: .byte 255 // 0xff +; CHECK-SD-LABEL: .LCPI13_1: +; CHECK-SD: .byte 0 // 0x0 +; CHECK-SD-NEXT: .byte 1 // 0x1 +; CHECK-SD-NEXT: .byte 2 // 0x2 +; CHECK-SD-NEXT: .byte 3 // 0x3 +; CHECK-SD-NEXT: .byte 4 // 0x4 +; CHECK-SD-NEXT: .byte 5 // 0x5 +; CHECK-SD-NEXT: .byte 6 // 0x6 +; CHECK-SD-NEXT: .byte 7 // 0x7 +; CHECK-SD-NEXT: .byte 16 // 0x10 +; CHECK-SD-NEXT: .byte 17 // 0x11 +; CHECK-SD-NEXT: .byte 18 // 0x12 +; CHECK-SD-NEXT: .byte 19 // 0x13 +; CHECK-SD-NEXT: .byte 20 // 0x14 +; CHECK-SD-NEXT: .byte 21 // 0x15 +; CHECK-SD-NEXT: .byte 30 // 0x1e +; CHECK-SD-NEXT: .byte 31 // 0x1f + +; CHECK-GI-LABEL: .LCPI13_0: +; CHECK-GI: .byte 0 // 0x0 +; CHECK-GI-NEXT: .byte 1 // 0x1 +; CHECK-GI-NEXT: .byte 2 // 0x2 +; CHECK-GI-NEXT: .byte 3 // 0x3 +; CHECK-GI-NEXT: .byte 4 // 0x4 +; CHECK-GI-NEXT: .byte 5 // 0x5 +; CHECK-GI-NEXT: .byte 6 // 0x6 +; CHECK-GI-NEXT: .byte 7 // 0x7 +; CHECK-GI-NEXT: .byte 16 // 0x10 +; CHECK-GI-NEXT: .byte 17 // 0x11 +; CHECK-GI-NEXT: .byte 18 // 0x12 +; CHECK-GI-NEXT: .byte 19 // 0x13 +; CHECK-GI-NEXT: .byte 20 // 0x14 +; CHECK-GI-NEXT: .byte 21 // 0x15 +; CHECK-GI-NEXT: .byte 30 // 0x1e +; CHECK-GI-NEXT: .byte 31 // 0x1f +; CHECK-GI-LABEL: .LCPI13_1: +; CHECK-GI: .byte 0 // 0x0 +; CHECK-GI-NEXT: .byte 4 // 0x4 +; CHECK-GI-NEXT: .byte 8 // 0x8 +; CHECK-GI-NEXT: .byte 12 // 0xc +; CHECK-GI-NEXT: .byte 16 // 0x10 +; CHECK-GI-NEXT: .byte 20 // 0x14 +; CHECK-GI-NEXT: .byte 24 // 0x18 +; CHECK-GI-NEXT: .byte 28 // 0x1c +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff + define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { -; CHECK-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask2: -; CHECK: // %bb.0: -; CHECK-NEXT: dup.16b v4, w0 -; CHECK-NEXT: mov w8, #255 // =0xff -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: mov.b v4[8], w8 -; CHECK-NEXT: mov.b v4[9], w8 -; CHECK-NEXT: mov.b v4[10], w8 -; CHECK-NEXT: mov.b v4[11], w8 -; CHECK-NEXT: mov.b v4[12], w8 -; CHECK-NEXT: mov.b v4[13], w8 -; CHECK-NEXT: adrp x8, .LCPI13_0 -; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI13_0] -; CHECK-NEXT: adrp x8, .LCPI13_1 -; CHECK-NEXT: tbl.16b v2, { v2, v3 }, v5 -; CHECK-NEXT: tbl.16b v3, { v0, v1 }, v4 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI13_1] -; CHECK-NEXT: tbl.16b v0, { v2, v3 }, v0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask2: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: dup.16b v4, w0 +; CHECK-SD-NEXT: mov w8, #255 // =0xff +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: mov.b v4[8], w8 +; CHECK-SD-NEXT: mov.b v4[9], w8 +; CHECK-SD-NEXT: mov.b v4[10], w8 +; CHECK-SD-NEXT: mov.b v4[11], w8 +; CHECK-SD-NEXT: mov.b v4[12], w8 +; CHECK-SD-NEXT: mov.b v4[13], w8 +; CHECK-SD-NEXT: adrp x8, .LCPI13_0 +; CHECK-SD-NEXT: ldr q5, [x8, :lo12:.LCPI13_0] +; CHECK-SD-NEXT: adrp x8, .LCPI13_1 +; CHECK-SD-NEXT: tbl.16b v2, { v2, v3 }, v5 +; CHECK-SD-NEXT: tbl.16b v3, { v0, v1 }, v4 +; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI13_1] +; CHECK-SD-NEXT: tbl.16b v0, { v2, v3 }, v0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask2: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov s4, w0 +; CHECK-GI-NEXT: mov w8, #255 // =0xff +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: fmov s6, w8 +; CHECK-GI-NEXT: adrp x8, .LCPI13_1 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: mov.16b v5, v4 +; CHECK-GI-NEXT: mov.b v5[1], v4[0] +; CHECK-GI-NEXT: mov.b v5[2], v4[0] +; CHECK-GI-NEXT: mov.b v5[3], v4[0] +; CHECK-GI-NEXT: mov.b v5[4], v4[0] +; CHECK-GI-NEXT: mov.b v5[5], v4[0] +; CHECK-GI-NEXT: mov.b v5[6], v4[0] +; CHECK-GI-NEXT: mov.b v5[7], v4[0] +; CHECK-GI-NEXT: mov.b v5[8], v6[0] +; CHECK-GI-NEXT: mov.b v5[9], v6[0] +; CHECK-GI-NEXT: mov.b v5[10], v6[0] +; CHECK-GI-NEXT: mov.b v5[11], v6[0] +; CHECK-GI-NEXT: mov.b v5[12], v6[0] +; CHECK-GI-NEXT: mov.b v5[13], v6[0] +; CHECK-GI-NEXT: mov.b v5[14], v4[0] +; CHECK-GI-NEXT: mov.b v5[15], v4[0] +; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI13_1] +; CHECK-GI-NEXT: adrp x8, .LCPI13_0 +; CHECK-GI-NEXT: tbl.16b v2, { v2, v3 }, v4 +; CHECK-GI-NEXT: tbl.16b v3, { v0, v1 }, v5 +; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI13_0] +; CHECK-GI-NEXT: tbl.16b v0, { v2, v3 }, v0 +; CHECK-GI-NEXT: ret %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1 %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2 @@ -357,106 +823,293 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 ret <16 x i8> %s } +; CHECK-SD-LABEL: .LCPI14_0: +; CHECK-SD: .byte 0 // 0x0 +; CHECK-SD-NEXT: .byte 4 // 0x4 +; CHECK-SD-NEXT: .byte 52 // 0x34 +; CHECK-SD-NEXT: .byte 12 // 0xc +; CHECK-SD-NEXT: .byte 16 // 0x10 +; CHECK-SD-NEXT: .byte 20 // 0x14 +; CHECK-SD-NEXT: .byte 24 // 0x18 +; CHECK-SD-NEXT: .byte 28 // 0x1c +; CHECK-SD-NEXT: .byte 32 // 0x20 +; CHECK-SD-NEXT: .byte 36 // 0x24 +; CHECK-SD-NEXT: .byte 40 // 0x28 +; CHECK-SD-NEXT: .byte 44 // 0x2c +; CHECK-SD-NEXT: .byte 48 // 0x30 +; CHECK-SD-NEXT: .byte 52 // 0x34 +; CHECK-SD-NEXT: .byte 56 // 0x38 +; CHECK-SD-NEXT: .byte 60 // 0x3c -; CHECK-LABEL: .LCPI14_0: -; CHECK-NEXT: .byte 0 // 0x0 -; CHECK-NEXT: .byte 4 // 0x4 -; CHECK-NEXT: .byte 52 // 0x34 -; CHECK-NEXT: .byte 12 // 0xc -; CHECK-NEXT: .byte 16 // 0x10 -; CHECK-NEXT: .byte 20 // 0x14 -; CHECK-NEXT: .byte 24 // 0x18 -; CHECK-NEXT: .byte 28 // 0x1c -; CHECK-NEXT: .byte 32 // 0x20 -; CHECK-NEXT: .byte 36 // 0x24 -; CHECK-NEXT: .byte 40 // 0x28 -; CHECK-NEXT: .byte 44 // 0x2c -; CHECK-NEXT: .byte 48 // 0x30 -; CHECK-NEXT: .byte 52 // 0x34 -; CHECK-NEXT: .byte 56 // 0x38 -; CHECK-NEXT: .byte 60 // 0x3c +; CHECK-GI-LABEL: .LCPI14_0: +; CHECK-GI: .byte 0 // 0x0 +; CHECK-GI-NEXT: .byte 1 // 0x1 +; CHECK-GI-NEXT: .byte 21 // 0x15 +; CHECK-GI-NEXT: .byte 3 // 0x3 +; CHECK-GI-NEXT: .byte 4 // 0x4 +; CHECK-GI-NEXT: .byte 5 // 0x5 +; CHECK-GI-NEXT: .byte 6 // 0x6 +; CHECK-GI-NEXT: .byte 7 // 0x7 +; CHECK-GI-NEXT: .byte 16 // 0x10 +; CHECK-GI-NEXT: .byte 17 // 0x11 +; CHECK-GI-NEXT: .byte 18 // 0x12 +; CHECK-GI-NEXT: .byte 19 // 0x13 +; CHECK-GI-NEXT: .byte 20 // 0x14 +; CHECK-GI-NEXT: .byte 21 // 0x15 +; CHECK-GI-NEXT: .byte 22 // 0x16 +; CHECK-GI-NEXT: .byte 23 // 0x17 +; CHECK-GI-LABEL: .LCPI14_1: +; CHECK-GI: .byte 0 // 0x0 +; CHECK-GI-NEXT: .byte 4 // 0x4 +; CHECK-GI-NEXT: .byte 8 // 0x8 +; CHECK-GI-NEXT: .byte 12 // 0xc +; CHECK-GI-NEXT: .byte 16 // 0x10 +; CHECK-GI-NEXT: .byte 20 // 0x14 +; CHECK-GI-NEXT: .byte 24 // 0x18 +; CHECK-GI-NEXT: .byte 28 // 0x1c +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { -; CHECK-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: adrp x8, .LCPI14_0 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI14_0] -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: adrp x8, .LCPI14_0 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI14_0] +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI14_1 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 +; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI14_1] +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 +; CHECK-GI-NEXT: adrp x8, .LCPI14_0 +; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 +; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI14_0] +; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-GI-NEXT: ret %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> ret <16 x i8> %s } -; CHECK-LABEL: .LCPI15_0: -; CHECK-NEXT: .byte 0 // 0x0 -; CHECK-NEXT: .byte 4 // 0x4 -; CHECK-NEXT: .byte 52 // 0x34 -; CHECK-NEXT: .byte 12 // 0xc -; CHECK-NEXT: .byte 16 // 0x10 -; CHECK-NEXT: .byte 20 // 0x14 -; CHECK-NEXT: .byte 24 // 0x18 -; CHECK-NEXT: .byte 28 // 0x1c -; CHECK-NEXT: .byte 32 // 0x20 -; CHECK-NEXT: .byte 36 // 0x24 -; CHECK-NEXT: .byte 40 // 0x28 -; CHECK-NEXT: .byte 44 // 0x2c -; CHECK-NEXT: .byte 48 // 0x30 -; CHECK-NEXT: .byte 52 // 0x34 -; CHECK-NEXT: .byte 56 // 0x38 -; CHECK-NEXT: .byte 60 // 0x3c +; CHECK-SD-LABEL: .LCPI15_0: +; CHECK-SD: .byte 0 // 0x0 +; CHECK-SD-NEXT: .byte 4 // 0x4 +; CHECK-SD-NEXT: .byte 52 // 0x34 +; CHECK-SD-NEXT: .byte 12 // 0xc +; CHECK-SD-NEXT: .byte 16 // 0x10 +; CHECK-SD-NEXT: .byte 20 // 0x14 +; CHECK-SD-NEXT: .byte 24 // 0x18 +; CHECK-SD-NEXT: .byte 28 // 0x1c +; CHECK-SD-NEXT: .byte 32 // 0x20 +; CHECK-SD-NEXT: .byte 36 // 0x24 +; CHECK-SD-NEXT: .byte 40 // 0x28 +; CHECK-SD-NEXT: .byte 44 // 0x2c +; CHECK-SD-NEXT: .byte 48 // 0x30 +; CHECK-SD-NEXT: .byte 52 // 0x34 +; CHECK-SD-NEXT: .byte 56 // 0x38 +; CHECK-SD-NEXT: .byte 60 // 0x3c + +; CHECK-GI-LABEL: .LCPI15_0: +; CHECK-GI: .byte 0 // 0x0 +; CHECK-GI-NEXT: .byte 1 // 0x1 +; CHECK-GI-NEXT: .byte 21 // 0x15 +; CHECK-GI-NEXT: .byte 3 // 0x3 +; CHECK-GI-NEXT: .byte 4 // 0x4 +; CHECK-GI-NEXT: .byte 5 // 0x5 +; CHECK-GI-NEXT: .byte 6 // 0x6 +; CHECK-GI-NEXT: .byte 7 // 0x7 +; CHECK-GI-NEXT: .byte 16 // 0x10 +; CHECK-GI-NEXT: .byte 17 // 0x11 +; CHECK-GI-NEXT: .byte 18 // 0x12 +; CHECK-GI-NEXT: .byte 19 // 0x13 +; CHECK-GI-NEXT: .byte 20 // 0x14 +; CHECK-GI-NEXT: .byte 21 // 0x15 +; CHECK-GI-NEXT: .byte 22 // 0x16 +; CHECK-GI-NEXT: .byte 23 // 0x17 +; CHECK-GI-LABEL: .LCPI15_1: +; CHECK-GI: .byte 0 // 0x0 +; CHECK-GI-NEXT: .byte 4 // 0x4 +; CHECK-GI-NEXT: .byte 8 // 0x8 +; CHECK-GI-NEXT: .byte 12 // 0xc +; CHECK-GI-NEXT: .byte 16 // 0x10 +; CHECK-GI-NEXT: .byte 20 // 0x14 +; CHECK-GI-NEXT: .byte 24 // 0x18 +; CHECK-GI-NEXT: .byte 28 // 0x1c +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-LABEL: .LCPI15_2: +; CHECK-GI: .byte 0 // 0x0 +; CHECK-GI-NEXT: .byte 4 // 0x4 +; CHECK-GI-NEXT: .byte 8 // 0x8 +; CHECK-GI-NEXT: .byte 12 // 0xc +; CHECK-GI-NEXT: .byte 16 // 0x10 +; CHECK-GI-NEXT: .byte 20 // 0x14 +; CHECK-GI-NEXT: .byte 24 // 0x18 +; CHECK-GI-NEXT: .byte 28 // 0x1c +; CHECK-GI-NEXT: .byte 0 // 0x0 +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { -; CHECK-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: adrp x8, .LCPI15_0 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI15_0] -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: adrp x8, .LCPI15_0 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI15_0] +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI15_2 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 +; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI15_2] +; CHECK-GI-NEXT: adrp x8, .LCPI15_1 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 +; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI15_1] +; CHECK-GI-NEXT: adrp x8, .LCPI15_0 +; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 +; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v5 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI15_0] +; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-GI-NEXT: ret %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> ret <16 x i8> %s } -; CHECK-LABEL: .LCPI16_0: -; CHECK-NEXT: .byte 0 // 0x0 -; CHECK-NEXT: .byte 4 // 0x4 -; CHECK-NEXT: .byte 52 // 0x34 -; CHECK-NEXT: .byte 12 // 0xc -; CHECK-NEXT: .byte 16 // 0x10 -; CHECK-NEXT: .byte 20 // 0x14 -; CHECK-NEXT: .byte 24 // 0x18 -; CHECK-NEXT: .byte 28 // 0x1c -; CHECK-NEXT: .byte 32 // 0x20 -; CHECK-NEXT: .byte 36 // 0x24 -; CHECK-NEXT: .byte 40 // 0x28 -; CHECK-NEXT: .byte 44 // 0x2c -; CHECK-NEXT: .byte 48 // 0x30 -; CHECK-NEXT: .byte 52 // 0x34 -; CHECK-NEXT: .byte 56 // 0x38 -; CHECK-NEXT: .byte 60 // 0x3c +; CHECK-SD-LABEL: .LCPI16_0: +; CHECK-SD: .byte 0 // 0x0 +; CHECK-SD-NEXT: .byte 4 // 0x4 +; CHECK-SD-NEXT: .byte 52 // 0x34 +; CHECK-SD-NEXT: .byte 12 // 0xc +; CHECK-SD-NEXT: .byte 16 // 0x10 +; CHECK-SD-NEXT: .byte 20 // 0x14 +; CHECK-SD-NEXT: .byte 24 // 0x18 +; CHECK-SD-NEXT: .byte 28 // 0x1c +; CHECK-SD-NEXT: .byte 32 // 0x20 +; CHECK-SD-NEXT: .byte 36 // 0x24 +; CHECK-SD-NEXT: .byte 40 // 0x28 +; CHECK-SD-NEXT: .byte 44 // 0x2c +; CHECK-SD-NEXT: .byte 48 // 0x30 +; CHECK-SD-NEXT: .byte 52 // 0x34 +; CHECK-SD-NEXT: .byte 56 // 0x38 +; CHECK-SD-NEXT: .byte 60 // 0x3c + +; CHECK-GI-LABEL: .LCPI16_0: +; CHECK-GI: .byte 0 // 0x0 +; CHECK-GI-NEXT: .byte 1 // 0x1 +; CHECK-GI-NEXT: .byte 21 // 0x15 +; CHECK-GI-NEXT: .byte 3 // 0x3 +; CHECK-GI-NEXT: .byte 4 // 0x4 +; CHECK-GI-NEXT: .byte 5 // 0x5 +; CHECK-GI-NEXT: .byte 6 // 0x6 +; CHECK-GI-NEXT: .byte 7 // 0x7 +; CHECK-GI-NEXT: .byte 16 // 0x10 +; CHECK-GI-NEXT: .byte 17 // 0x11 +; CHECK-GI-NEXT: .byte 18 // 0x12 +; CHECK-GI-NEXT: .byte 19 // 0x13 +; CHECK-GI-NEXT: .byte 20 // 0x14 +; CHECK-GI-NEXT: .byte 21 // 0x15 +; CHECK-GI-NEXT: .byte 22 // 0x16 +; CHECK-GI-NEXT: .byte 23 // 0x17 +; CHECK-GI-LABEL: .LCPI16_1: +; CHECK-GI: .byte 0 // 0x0 +; CHECK-GI-NEXT: .byte 4 // 0x4 +; CHECK-GI-NEXT: .byte 8 // 0x8 +; CHECK-GI-NEXT: .byte 12 // 0xc +; CHECK-GI-NEXT: .byte 16 // 0x10 +; CHECK-GI-NEXT: .byte 20 // 0x14 +; CHECK-GI-NEXT: .byte 24 // 0x18 +; CHECK-GI-NEXT: .byte 28 // 0x1c +; CHECK-GI-NEXT: .byte 0 // 0x0 +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-LABEL: .LCPI16_2: +; CHECK-GI: .byte 0 // 0x0 +; CHECK-GI-NEXT: .byte 4 // 0x4 +; CHECK-GI-NEXT: .byte 8 // 0x8 +; CHECK-GI-NEXT: .byte 12 // 0xc +; CHECK-GI-NEXT: .byte 16 // 0x10 +; CHECK-GI-NEXT: .byte 20 // 0x14 +; CHECK-GI-NEXT: .byte 24 // 0x18 +; CHECK-GI-NEXT: .byte 28 // 0x1c +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff +; CHECK-GI-NEXT: .byte 255 // 0xff define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { -; CHECK-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: adrp x8, .LCPI16_0 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI16_0] -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: adrp x8, .LCPI16_0 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_0] +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI16_2 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 +; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI16_2] +; CHECK-GI-NEXT: adrp x8, .LCPI16_1 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 +; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI16_1] +; CHECK-GI-NEXT: adrp x8, .LCPI16_0 +; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 +; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v5 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI16_0] +; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-GI-NEXT: ret %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> @@ -491,73 +1144,121 @@ define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind { } define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { -; CHECK-LABEL: tbx2_8b: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 -; CHECK-NEXT: tbx.8b v0, { v1, v2 }, v3 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: tbx2_8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 +; CHECK-SD-NEXT: tbx.8b v0, { v1, v2 }, v3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbx2_8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 +; CHECK-GI-NEXT: tbx.8b v0, { v1, v2 }, v3 +; CHECK-GI-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) ret <8 x i8> %tmp3 } define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { -; CHECK-LABEL: tbx2_16b: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 -; CHECK-NEXT: tbx.16b v0, { v1, v2 }, v3 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: tbx2_16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 +; CHECK-SD-NEXT: tbx.16b v0, { v1, v2 }, v3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbx2_16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 +; CHECK-GI-NEXT: tbx.16b v0, { v1, v2 }, v3 +; CHECK-GI-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) ret <16 x i8> %tmp3 } define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { -; CHECK-LABEL: tbx3_8b: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-NEXT: tbx.8b v0, { v1, v2, v3 }, v4 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: tbx3_8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-SD-NEXT: tbx.8b v0, { v1, v2, v3 }, v4 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbx3_8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-GI-NEXT: tbx.8b v0, { v1, v2, v3 }, v4 +; CHECK-GI-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) ret <8 x i8> %tmp3 } define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { -; CHECK-LABEL: tbx3_16b: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-NEXT: tbx.16b v0, { v1, v2, v3 }, v4 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: tbx3_16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-SD-NEXT: tbx.16b v0, { v1, v2, v3 }, v4 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbx3_16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-GI-NEXT: tbx.16b v0, { v1, v2, v3 }, v4 +; CHECK-GI-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) ret <16 x i8> %tmp3 } define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) { -; CHECK-LABEL: tbx4_8b: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: tbx4_8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-SD-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbx4_8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-GI-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-GI-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5 +; CHECK-GI-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) ret <8 x i8> %tmp3 } define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) { -; CHECK-LABEL: tbx4_16b: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: tbx4_16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-SD-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbx4_16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-GI-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-GI-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5 +; CHECK-GI-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) ret <16 x i8> %tmp3 } diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll index f7d31a214563b..178c229d04e47 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -289,26 +289,27 @@ define i32 @uabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) { ; ; CHECK-GI-LABEL: uabd16b_rdx_i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ushll.8h v2, v0, #0 -; CHECK-GI-NEXT: ushll.8h v3, v1, #0 -; CHECK-GI-NEXT: ushll2.8h v0, v0, #0 -; CHECK-GI-NEXT: ushll2.8h v1, v1, #0 -; CHECK-GI-NEXT: usubl.4s v4, v2, v3 -; CHECK-GI-NEXT: usubl2.4s v2, v2, v3 -; CHECK-GI-NEXT: usubl.4s v3, v0, v1 -; CHECK-GI-NEXT: usubl2.4s v0, v0, v1 -; CHECK-GI-NEXT: cmlt.4s v1, v4, #0 -; CHECK-GI-NEXT: cmlt.4s v5, v2, #0 -; CHECK-GI-NEXT: neg.4s v16, v4 -; CHECK-GI-NEXT: cmlt.4s v6, v3, #0 -; CHECK-GI-NEXT: cmlt.4s v7, v0, #0 -; CHECK-GI-NEXT: neg.4s v17, v2 -; CHECK-GI-NEXT: neg.4s v18, v3 -; CHECK-GI-NEXT: neg.4s v19, v0 -; CHECK-GI-NEXT: bsl.16b v1, v16, v4 -; CHECK-GI-NEXT: bit.16b v2, v17, v5 -; CHECK-GI-NEXT: bit.16b v3, v18, v6 -; CHECK-GI-NEXT: bit.16b v0, v19, v7 +; CHECK-GI-NEXT: usubl.8h v3, v0, v1 +; CHECK-GI-NEXT: movi.2d v2, #0000000000000000 +; CHECK-GI-NEXT: usubl2.8h v0, v0, v1 +; CHECK-GI-NEXT: sshll.4s v1, v3, #0 +; CHECK-GI-NEXT: sshll2.4s v4, v3, #0 +; CHECK-GI-NEXT: sshll.4s v5, v0, #0 +; CHECK-GI-NEXT: sshll2.4s v6, v0, #0 +; CHECK-GI-NEXT: ssubw2.4s v3, v2, v3 +; CHECK-GI-NEXT: ssubw2.4s v0, v2, v0 +; CHECK-GI-NEXT: cmlt.4s v2, v1, #0 +; CHECK-GI-NEXT: cmlt.4s v7, v4, #0 +; CHECK-GI-NEXT: neg.4s v16, v1 +; CHECK-GI-NEXT: cmlt.4s v17, v5, #0 +; CHECK-GI-NEXT: cmlt.4s v18, v6, #0 +; CHECK-GI-NEXT: neg.4s v19, v5 +; CHECK-GI-NEXT: bit.16b v1, v16, v2 +; CHECK-GI-NEXT: mov.16b v2, v7 +; CHECK-GI-NEXT: bif.16b v0, v6, v18 +; CHECK-GI-NEXT: bsl.16b v2, v3, v4 +; CHECK-GI-NEXT: mov.16b v3, v17 +; CHECK-GI-NEXT: bsl.16b v3, v19, v5 ; CHECK-GI-NEXT: add.4s v1, v1, v2 ; CHECK-GI-NEXT: add.4s v0, v3, v0 ; CHECK-GI-NEXT: add.4s v0, v1, v0 @@ -336,26 +337,27 @@ define i32 @sabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) { ; ; CHECK-GI-LABEL: sabd16b_rdx_i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sshll.8h v2, v0, #0 -; CHECK-GI-NEXT: sshll.8h v3, v1, #0 -; CHECK-GI-NEXT: sshll2.8h v0, v0, #0 -; CHECK-GI-NEXT: sshll2.8h v1, v1, #0 -; CHECK-GI-NEXT: ssubl.4s v4, v2, v3 -; CHECK-GI-NEXT: ssubl2.4s v2, v2, v3 -; CHECK-GI-NEXT: ssubl.4s v3, v0, v1 -; CHECK-GI-NEXT: ssubl2.4s v0, v0, v1 -; CHECK-GI-NEXT: cmlt.4s v1, v4, #0 -; CHECK-GI-NEXT: cmlt.4s v5, v2, #0 -; CHECK-GI-NEXT: neg.4s v16, v4 -; CHECK-GI-NEXT: cmlt.4s v6, v3, #0 -; CHECK-GI-NEXT: cmlt.4s v7, v0, #0 -; CHECK-GI-NEXT: neg.4s v17, v2 -; CHECK-GI-NEXT: neg.4s v18, v3 -; CHECK-GI-NEXT: neg.4s v19, v0 -; CHECK-GI-NEXT: bsl.16b v1, v16, v4 -; CHECK-GI-NEXT: bit.16b v2, v17, v5 -; CHECK-GI-NEXT: bit.16b v3, v18, v6 -; CHECK-GI-NEXT: bit.16b v0, v19, v7 +; CHECK-GI-NEXT: ssubl.8h v3, v0, v1 +; CHECK-GI-NEXT: movi.2d v2, #0000000000000000 +; CHECK-GI-NEXT: ssubl2.8h v0, v0, v1 +; CHECK-GI-NEXT: sshll.4s v1, v3, #0 +; CHECK-GI-NEXT: sshll2.4s v4, v3, #0 +; CHECK-GI-NEXT: sshll.4s v5, v0, #0 +; CHECK-GI-NEXT: sshll2.4s v6, v0, #0 +; CHECK-GI-NEXT: ssubw2.4s v3, v2, v3 +; CHECK-GI-NEXT: ssubw2.4s v0, v2, v0 +; CHECK-GI-NEXT: cmlt.4s v2, v1, #0 +; CHECK-GI-NEXT: cmlt.4s v7, v4, #0 +; CHECK-GI-NEXT: neg.4s v16, v1 +; CHECK-GI-NEXT: cmlt.4s v17, v5, #0 +; CHECK-GI-NEXT: cmlt.4s v18, v6, #0 +; CHECK-GI-NEXT: neg.4s v19, v5 +; CHECK-GI-NEXT: bit.16b v1, v16, v2 +; CHECK-GI-NEXT: mov.16b v2, v7 +; CHECK-GI-NEXT: bif.16b v0, v6, v18 +; CHECK-GI-NEXT: bsl.16b v2, v3, v4 +; CHECK-GI-NEXT: mov.16b v3, v17 +; CHECK-GI-NEXT: bsl.16b v3, v19, v5 ; CHECK-GI-NEXT: add.4s v1, v1, v2 ; CHECK-GI-NEXT: add.4s v0, v3, v0 ; CHECK-GI-NEXT: add.4s v0, v1, v0 diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/AArch64/atomicrmw-uinc-udec-wrap.ll index 5f293e5c7ea34..66fea3535b1ec 100644 --- a/llvm/test/CodeGen/AArch64/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/AArch64/atomicrmw-uinc-udec-wrap.ll @@ -55,15 +55,15 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { ; CHECK-LABEL: atomicrmw_uinc_wrap_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: .LBB3_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr x0, [x8] -; CHECK-NEXT: cmp x0, x1 -; CHECK-NEXT: csinc x9, xzr, x0, hs -; CHECK-NEXT: stlxr w10, x9, [x8] +; CHECK-NEXT: ldaxr x8, [x0] +; CHECK-NEXT: cmp x8, x1 +; CHECK-NEXT: csinc x9, xzr, x8, hs +; CHECK-NEXT: stlxr w10, x9, [x0] ; CHECK-NEXT: cbnz w10, .LBB3_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov x0, x8 ; CHECK-NEXT: ret %result = atomicrmw uinc_wrap ptr %ptr, i64 %val seq_cst ret i64 %result diff --git a/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll b/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll index 83c7f73800af1..dfe0e83649e20 100644 --- a/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll +++ b/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll @@ -8,57 +8,57 @@ declare void @llvm.masked.scatter.nxv16i8.nxv16p0(, This Inner Loop Header: Depth=1 -; CHECK-NEXT: fmov d17, xzr -; CHECK-NEXT: cmpeq p2.d, p0/z, z17.d, #0 -; CHECK-NEXT: uzp1 p2.s, p2.s, p0.s -; CHECK-NEXT: uzp1 p2.h, p2.h, p0.h -; CHECK-NEXT: uzp1 p2.b, p2.b, p0.b -; CHECK-NEXT: mov z17.b, p2/z, #1 // =0x1 -; CHECK-NEXT: fmov w8, s17 -; CHECK-NEXT: sbfx x8, x8, #0, #1 -; CHECK-NEXT: whilelo p2.b, xzr, x8 -; CHECK-NEXT: not p2.b, p1/z, p2.b -; CHECK-NEXT: punpklo p3.h, p2.b -; CHECK-NEXT: punpkhi p2.h, p2.b -; CHECK-NEXT: punpklo p4.h, p3.b -; CHECK-NEXT: punpkhi p3.h, p3.b -; CHECK-NEXT: punpklo p5.h, p4.b -; CHECK-NEXT: punpkhi p4.h, p4.b -; CHECK-NEXT: st1b { z0.d }, p5, [z16.d] -; CHECK-NEXT: st1b { z1.d }, p4, [z16.d] -; CHECK-NEXT: punpklo p4.h, p3.b +; CHECK-NEXT: punpklo p0.h, p2.b +; CHECK-NEXT: punpkhi p1.h, p2.b +; CHECK-NEXT: punpklo p2.h, p3.b ; CHECK-NEXT: punpkhi p3.h, p3.b -; CHECK-NEXT: st1b { z2.d }, p4, [z16.d] +; CHECK-NEXT: punpklo p4.h, p5.b +; CHECK-NEXT: punpkhi p5.h, p5.b +; CHECK-NEXT: punpklo p6.h, p7.b +; CHECK-NEXT: punpkhi p7.h, p7.b +; CHECK-NEXT: .LBB0_1: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: st1b { z0.d }, p0, [z16.d] +; CHECK-NEXT: st1b { z1.d }, p1, [z16.d] +; CHECK-NEXT: st1b { z2.d }, p2, [z16.d] ; CHECK-NEXT: st1b { z3.d }, p3, [z16.d] -; CHECK-NEXT: punpklo p3.h, p2.b -; CHECK-NEXT: punpkhi p2.h, p2.b -; CHECK-NEXT: punpklo p4.h, p3.b -; CHECK-NEXT: punpkhi p3.h, p3.b ; CHECK-NEXT: st1b { z4.d }, p4, [z16.d] -; CHECK-NEXT: st1b { z5.d }, p3, [z16.d] -; CHECK-NEXT: punpklo p3.h, p2.b -; CHECK-NEXT: punpkhi p2.h, p2.b -; CHECK-NEXT: st1b { z6.d }, p3, [z16.d] -; CHECK-NEXT: st1b { z7.d }, p2, [z16.d] +; CHECK-NEXT: st1b { z5.d }, p5, [z16.d] +; CHECK-NEXT: st1b { z6.d }, p6, [z16.d] +; CHECK-NEXT: st1b { z7.d }, p7, [z16.d] ; CHECK-NEXT: b .LBB0_1 br label %1 diff --git a/llvm/test/CodeGen/AArch64/fixed-point-conv-vec-pat.ll b/llvm/test/CodeGen/AArch64/fixed-point-conv-vec-pat.ll new file mode 100644 index 0000000000000..dff216192a6c3 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fixed-point-conv-vec-pat.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64" + +; First some corner cases +define <4 x float> @f_v4_s0(<4 x i32> %u) { +; CHECK-LABEL: f_v4_s0: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf v0.4s, v0.4s +; CHECK-NEXT: ret + %s = ashr exact <4 x i32> %u, + %v = sitofp <4 x i32> %s to <4 x float> + ret <4 x float> %v +} + +define <4 x float> @f_v4_s1(<4 x i32> %u) { +; CHECK-LABEL: f_v4_s1: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf v0.4s, v0.4s, #1 +; CHECK-NEXT: ret + %s = ashr exact <4 x i32> %u, + %v = sitofp <4 x i32> %s to <4 x float> + ret <4 x float> %v +} + +define <4 x float> @f_v4_s24_inexact(<4 x i32> %u) { +; CHECK-LABEL: f_v4_s24_inexact: +; CHECK: // %bb.0: +; CHECK-NEXT: sshr v0.4s, v0.4s, #24 +; CHECK-NEXT: scvtf v0.4s, v0.4s +; CHECK-NEXT: ret + %s = ashr <4 x i32> %u, + %v = sitofp <4 x i32> %s to <4 x float> + ret <4 x float> %v +} + +define <4 x float> @f_v4_s31(<4 x i32> %u) { +; CHECK-LABEL: f_v4_s31: +; CHECK: // %bb.0: +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 +; CHECK-NEXT: scvtf v0.4s, v0.4s +; CHECK-NEXT: ret + %s = ashr <4 x i32> %u, + %v = sitofp <4 x i32> %s to <4 x float> + ret <4 x float> %v +} + +; Common cases for conversion from signed integer to floating point types +define <2 x float> @f_v2_s24(<2 x i32> %u) { +; CHECK-LABEL: f_v2_s24: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf v0.2s, v0.2s, #24 +; CHECK-NEXT: ret + %s = ashr exact <2 x i32> %u, + %v = sitofp <2 x i32> %s to <2 x float> + ret <2 x float> %v +} + +define <4 x float> @f_v4_s24(<4 x i32> %u) { +; CHECK-LABEL: f_v4_s24: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf v0.4s, v0.4s, #24 +; CHECK-NEXT: ret + %s = ashr exact <4 x i32> %u, + %v = sitofp <4 x i32> %s to <4 x float> + ret <4 x float> %v +} + +; Check legalisation to <2 x f64> does not get in the way +define <8 x double> @d_v8_s64(<8 x i64> %u) { +; CHECK-LABEL: d_v8_s64: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf v0.2d, v0.2d, #56 +; CHECK-NEXT: scvtf v1.2d, v1.2d, #56 +; CHECK-NEXT: scvtf v2.2d, v2.2d, #56 +; CHECK-NEXT: scvtf v3.2d, v3.2d, #56 +; CHECK-NEXT: ret + %s = ashr exact <8 x i64> %u, + %v = sitofp <8 x i64> %s to <8 x double> + ret <8 x double> %v +} + +define <4 x half> @h_v4_s8(<4 x i16> %u) #0 { +; CHECK-LABEL: h_v4_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf v0.4h, v0.4h, #8 +; CHECK-NEXT: ret + %s = ashr exact <4 x i16> %u, + %v = sitofp <4 x i16> %s to <4 x half> + ret <4 x half> %v +} + +define <8 x half> @h_v8_s8(<8 x i16> %u) #0 { +; CHECK-LABEL: h_v8_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf v0.8h, v0.8h, #8 +; CHECK-NEXT: ret + %s = ashr exact <8 x i16> %u, + %v = sitofp <8 x i16> %s to <8 x half> + ret <8 x half> %v +} + +attributes #0 = { "target-features"="+fullfp16"} diff --git a/llvm/test/CodeGen/AArch64/machine-sink-cache-invalidation.ll b/llvm/test/CodeGen/AArch64/machine-sink-cache-invalidation.ll index 6effc63ecc13c..fe3715341a25b 100644 --- a/llvm/test/CodeGen/AArch64/machine-sink-cache-invalidation.ll +++ b/llvm/test/CodeGen/AArch64/machine-sink-cache-invalidation.ll @@ -9,20 +9,20 @@ target triple = "arm64-apple-macosx13.5.0" define i32 @nsis_BZ2_bzDecompress(ptr %pos.i, i1 %cmp661.not3117.i, i1 %exitcond.not.i) { ; CHECK-LABEL: nsis_BZ2_bzDecompress: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: b .LBB0_2 ; CHECK-NEXT: .LBB0_1: // %while.end671.i ; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1 -; CHECK-NEXT: strb w8, [x0] +; CHECK-NEXT: strb w9, [x0] ; CHECK-NEXT: tbnz w2, #0, .LBB0_4 ; CHECK-NEXT: .LBB0_2: // %for.body653.i ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrb w8, [x0] +; CHECK-NEXT: ldrb w9, [x0] ; CHECK-NEXT: tbnz w1, #0, .LBB0_1 ; CHECK-NEXT: // %bb.3: // %while.body663.i ; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1 -; CHECK-NEXT: mov x9, xzr -; CHECK-NEXT: ldrb w9, [x9] -; CHECK-NEXT: strb wzr, [x0, x9] +; CHECK-NEXT: ldrb w10, [x8] +; CHECK-NEXT: strb wzr, [x0, x10] ; CHECK-NEXT: b .LBB0_1 ; CHECK-NEXT: .LBB0_4: // %for.end677.i ; CHECK-NEXT: mov w0, wzr diff --git a/llvm/test/CodeGen/AArch64/mulcmle.ll b/llvm/test/CodeGen/AArch64/mulcmle.ll index 5c216b8550080..32bc5c5e63b3e 100644 --- a/llvm/test/CodeGen/AArch64/mulcmle.ll +++ b/llvm/test/CodeGen/AArch64/mulcmle.ll @@ -1,11 +1,22 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s +; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64 %s -o - -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI define <1 x i64> @v1i64(<1 x i64> %a) { -; CHECK-LABEL: v1i64: -; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v1i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmlt v0.2s, v0.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v1i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: lsr x8, x8, #31 +; CHECK-GI-NEXT: and x8, x8, #0x100000001 +; CHECK-GI-NEXT: lsl x9, x8, #32 +; CHECK-GI-NEXT: sub x8, x9, x8 +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %b = lshr <1 x i64> %a, %c = and <1 x i64> %b, %d = mul nuw <1 x i64> %c, diff --git a/llvm/test/CodeGen/AArch64/neon-extadd.ll b/llvm/test/CodeGen/AArch64/neon-extadd.ll index 16200435c5c31..402682c89124b 100644 --- a/llvm/test/CodeGen/AArch64/neon-extadd.ll +++ b/llvm/test/CodeGen/AArch64/neon-extadd.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple aarch64 -o - | FileCheck %s +; RUN: llc < %s -mtriple aarch64 -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple aarch64 -o - -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI define <8 x i16> @extadds_v8i8_i16(<8 x i8> %s0, <8 x i8> %s1) { ; CHECK-LABEL: extadds_v8i8_i16: @@ -26,12 +27,19 @@ entry: } define <16 x i16> @extadds_v16i8_i16(<16 x i8> %s0, <16 x i8> %s1) { -; CHECK-LABEL: extadds_v16i8_i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: saddl2 v2.8h, v0.16b, v1.16b -; CHECK-NEXT: saddl v0.8h, v0.8b, v1.8b -; CHECK-NEXT: mov v1.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: extadds_v16i8_i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: saddl2 v2.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: saddl v0.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: mov v1.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extadds_v16i8_i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: saddl v2.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: saddl2 v1.8h, v0.16b, v1.16b +; CHECK-GI-NEXT: mov v0.16b, v2.16b +; CHECK-GI-NEXT: ret entry: %s0s = sext <16 x i8> %s0 to <16 x i16> %s1s = sext <16 x i8> %s1 to <16 x i16> @@ -40,12 +48,19 @@ entry: } define <16 x i16> @extaddu_v16i8_i16(<16 x i8> %s0, <16 x i8> %s1) { -; CHECK-LABEL: extaddu_v16i8_i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uaddl2 v2.8h, v0.16b, v1.16b -; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b -; CHECK-NEXT: mov v1.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: extaddu_v16i8_i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: uaddl2 v2.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: uaddl v0.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: mov v1.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extaddu_v16i8_i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: uaddl v2.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: uaddl2 v1.8h, v0.16b, v1.16b +; CHECK-GI-NEXT: mov v0.16b, v2.16b +; CHECK-GI-NEXT: ret entry: %s0s = zext <16 x i8> %s0 to <16 x i16> %s1s = zext <16 x i8> %s1 to <16 x i16> @@ -54,16 +69,26 @@ entry: } define <32 x i16> @extadds_v32i8_i16(<32 x i8> %s0, <32 x i8> %s1) { -; CHECK-LABEL: extadds_v32i8_i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: saddl2 v4.8h, v1.16b, v3.16b -; CHECK-NEXT: saddl v5.8h, v0.8b, v2.8b -; CHECK-NEXT: saddl2 v6.8h, v0.16b, v2.16b -; CHECK-NEXT: saddl v2.8h, v1.8b, v3.8b -; CHECK-NEXT: mov v0.16b, v5.16b -; CHECK-NEXT: mov v1.16b, v6.16b -; CHECK-NEXT: mov v3.16b, v4.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: extadds_v32i8_i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: saddl2 v4.8h, v1.16b, v3.16b +; CHECK-SD-NEXT: saddl v5.8h, v0.8b, v2.8b +; CHECK-SD-NEXT: saddl2 v6.8h, v0.16b, v2.16b +; CHECK-SD-NEXT: saddl v2.8h, v1.8b, v3.8b +; CHECK-SD-NEXT: mov v0.16b, v5.16b +; CHECK-SD-NEXT: mov v1.16b, v6.16b +; CHECK-SD-NEXT: mov v3.16b, v4.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extadds_v32i8_i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: saddl v4.8h, v0.8b, v2.8b +; CHECK-GI-NEXT: saddl2 v5.8h, v0.16b, v2.16b +; CHECK-GI-NEXT: saddl v2.8h, v1.8b, v3.8b +; CHECK-GI-NEXT: saddl2 v3.8h, v1.16b, v3.16b +; CHECK-GI-NEXT: mov v0.16b, v4.16b +; CHECK-GI-NEXT: mov v1.16b, v5.16b +; CHECK-GI-NEXT: ret entry: %s0s = sext <32 x i8> %s0 to <32 x i16> %s1s = sext <32 x i8> %s1 to <32 x i16> @@ -72,16 +97,26 @@ entry: } define <32 x i16> @extaddu_v32i8_i16(<32 x i8> %s0, <32 x i8> %s1) { -; CHECK-LABEL: extaddu_v32i8_i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uaddl2 v4.8h, v1.16b, v3.16b -; CHECK-NEXT: uaddl v5.8h, v0.8b, v2.8b -; CHECK-NEXT: uaddl2 v6.8h, v0.16b, v2.16b -; CHECK-NEXT: uaddl v2.8h, v1.8b, v3.8b -; CHECK-NEXT: mov v0.16b, v5.16b -; CHECK-NEXT: mov v1.16b, v6.16b -; CHECK-NEXT: mov v3.16b, v4.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: extaddu_v32i8_i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: uaddl2 v4.8h, v1.16b, v3.16b +; CHECK-SD-NEXT: uaddl v5.8h, v0.8b, v2.8b +; CHECK-SD-NEXT: uaddl2 v6.8h, v0.16b, v2.16b +; CHECK-SD-NEXT: uaddl v2.8h, v1.8b, v3.8b +; CHECK-SD-NEXT: mov v0.16b, v5.16b +; CHECK-SD-NEXT: mov v1.16b, v6.16b +; CHECK-SD-NEXT: mov v3.16b, v4.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extaddu_v32i8_i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: uaddl v4.8h, v0.8b, v2.8b +; CHECK-GI-NEXT: uaddl2 v5.8h, v0.16b, v2.16b +; CHECK-GI-NEXT: uaddl v2.8h, v1.8b, v3.8b +; CHECK-GI-NEXT: uaddl2 v3.8h, v1.16b, v3.16b +; CHECK-GI-NEXT: mov v0.16b, v4.16b +; CHECK-GI-NEXT: mov v1.16b, v5.16b +; CHECK-GI-NEXT: ret entry: %s0s = zext <32 x i8> %s0 to <32 x i16> %s1s = zext <32 x i8> %s1 to <32 x i16> @@ -90,12 +125,19 @@ entry: } define <8 x i32> @extadds_v8i8_i32(<8 x i8> %s0, <8 x i8> %s1) { -; CHECK-LABEL: extadds_v8i8_i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: saddl v0.8h, v0.8b, v1.8b -; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0 -; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: extadds_v8i8_i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: saddl v0.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: sshll2 v1.4s, v0.8h, #0 +; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extadds_v8i8_i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: saddl v1.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: sshll v0.4s, v1.4h, #0 +; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0 +; CHECK-GI-NEXT: ret entry: %s0s = sext <8 x i8> %s0 to <8 x i32> %s1s = sext <8 x i8> %s1 to <8 x i32> @@ -104,12 +146,19 @@ entry: } define <8 x i32> @extaddu_v8i8_i32(<8 x i8> %s0, <8 x i8> %s1) { -; CHECK-LABEL: extaddu_v8i8_i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b -; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: extaddu_v8i8_i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: uaddl v0.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extaddu_v8i8_i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: uaddl v1.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: ushll v0.4s, v1.4h, #0 +; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0 +; CHECK-GI-NEXT: ret entry: %s0s = zext <8 x i8> %s0 to <8 x i32> %s1s = zext <8 x i8> %s1 to <8 x i32> @@ -117,16 +166,68 @@ entry: ret <8 x i32> %m } +define <8 x i32> @extsubs_v8i8_i32(<8 x i8> %s0, <8 x i8> %s1) { +; CHECK-SD-LABEL: extsubs_v8i8_i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ssubl v0.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: sshll2 v1.4s, v0.8h, #0 +; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extsubs_v8i8_i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ssubl v1.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: sshll v0.4s, v1.4h, #0 +; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0 +; CHECK-GI-NEXT: ret +entry: + %s0s = sext <8 x i8> %s0 to <8 x i32> + %s1s = sext <8 x i8> %s1 to <8 x i32> + %m = sub <8 x i32> %s0s, %s1s + ret <8 x i32> %m +} + +define <8 x i32> @extsubu_v8i8_i32(<8 x i8> %s0, <8 x i8> %s1) { +; CHECK-SD-LABEL: extsubu_v8i8_i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: usubl v0.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: sshll2 v1.4s, v0.8h, #0 +; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extsubu_v8i8_i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: usubl v1.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: sshll v0.4s, v1.4h, #0 +; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0 +; CHECK-GI-NEXT: ret +entry: + %s0s = zext <8 x i8> %s0 to <8 x i32> + %s1s = zext <8 x i8> %s1 to <8 x i32> + %m = sub <8 x i32> %s0s, %s1s + ret <8 x i32> %m +} + define <16 x i32> @extadds_v16i8_i32(<16 x i8> %s0, <16 x i8> %s1) { -; CHECK-LABEL: extadds_v16i8_i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: saddl v2.8h, v0.8b, v1.8b -; CHECK-NEXT: saddl2 v4.8h, v0.16b, v1.16b -; CHECK-NEXT: sshll v0.4s, v2.4h, #0 -; CHECK-NEXT: sshll2 v3.4s, v4.8h, #0 -; CHECK-NEXT: sshll2 v1.4s, v2.8h, #0 -; CHECK-NEXT: sshll v2.4s, v4.4h, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: extadds_v16i8_i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: saddl v2.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: saddl2 v4.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: sshll v0.4s, v2.4h, #0 +; CHECK-SD-NEXT: sshll2 v3.4s, v4.8h, #0 +; CHECK-SD-NEXT: sshll2 v1.4s, v2.8h, #0 +; CHECK-SD-NEXT: sshll v2.4s, v4.4h, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extadds_v16i8_i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: saddl v2.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: saddl2 v3.8h, v0.16b, v1.16b +; CHECK-GI-NEXT: sshll v0.4s, v2.4h, #0 +; CHECK-GI-NEXT: sshll2 v1.4s, v2.8h, #0 +; CHECK-GI-NEXT: sshll v2.4s, v3.4h, #0 +; CHECK-GI-NEXT: sshll2 v3.4s, v3.8h, #0 +; CHECK-GI-NEXT: ret entry: %s0s = sext <16 x i8> %s0 to <16 x i32> %s1s = sext <16 x i8> %s1 to <16 x i32> @@ -135,15 +236,25 @@ entry: } define <16 x i32> @extaddu_v16i8_i32(<16 x i8> %s0, <16 x i8> %s1) { -; CHECK-LABEL: extaddu_v16i8_i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uaddl v2.8h, v0.8b, v1.8b -; CHECK-NEXT: uaddl2 v4.8h, v0.16b, v1.16b -; CHECK-NEXT: ushll v0.4s, v2.4h, #0 -; CHECK-NEXT: ushll2 v3.4s, v4.8h, #0 -; CHECK-NEXT: ushll2 v1.4s, v2.8h, #0 -; CHECK-NEXT: ushll v2.4s, v4.4h, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: extaddu_v16i8_i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: uaddl v2.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: uaddl2 v4.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: ushll v0.4s, v2.4h, #0 +; CHECK-SD-NEXT: ushll2 v3.4s, v4.8h, #0 +; CHECK-SD-NEXT: ushll2 v1.4s, v2.8h, #0 +; CHECK-SD-NEXT: ushll v2.4s, v4.4h, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extaddu_v16i8_i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: uaddl v2.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: uaddl2 v3.8h, v0.16b, v1.16b +; CHECK-GI-NEXT: ushll v0.4s, v2.4h, #0 +; CHECK-GI-NEXT: ushll2 v1.4s, v2.8h, #0 +; CHECK-GI-NEXT: ushll v2.4s, v3.4h, #0 +; CHECK-GI-NEXT: ushll2 v3.4s, v3.8h, #0 +; CHECK-GI-NEXT: ret entry: %s0s = zext <16 x i8> %s0 to <16 x i32> %s1s = zext <16 x i8> %s1 to <16 x i32> @@ -151,17 +262,82 @@ entry: ret <16 x i32> %m } +define <16 x i32> @extsubs_v16i8_i32(<16 x i8> %s0, <16 x i8> %s1) { +; CHECK-SD-LABEL: extsubs_v16i8_i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ssubl v2.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: ssubl2 v4.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: sshll v0.4s, v2.4h, #0 +; CHECK-SD-NEXT: sshll2 v3.4s, v4.8h, #0 +; CHECK-SD-NEXT: sshll2 v1.4s, v2.8h, #0 +; CHECK-SD-NEXT: sshll v2.4s, v4.4h, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extsubs_v16i8_i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ssubl v2.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: ssubl2 v3.8h, v0.16b, v1.16b +; CHECK-GI-NEXT: sshll v0.4s, v2.4h, #0 +; CHECK-GI-NEXT: sshll2 v1.4s, v2.8h, #0 +; CHECK-GI-NEXT: sshll v2.4s, v3.4h, #0 +; CHECK-GI-NEXT: sshll2 v3.4s, v3.8h, #0 +; CHECK-GI-NEXT: ret +entry: + %s0s = sext <16 x i8> %s0 to <16 x i32> + %s1s = sext <16 x i8> %s1 to <16 x i32> + %m = sub <16 x i32> %s0s, %s1s + ret <16 x i32> %m +} + +define <16 x i32> @extsubu_v16i8_i32(<16 x i8> %s0, <16 x i8> %s1) { +; CHECK-SD-LABEL: extsubu_v16i8_i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: usubl v2.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: usubl2 v4.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: sshll v0.4s, v2.4h, #0 +; CHECK-SD-NEXT: sshll2 v3.4s, v4.8h, #0 +; CHECK-SD-NEXT: sshll2 v1.4s, v2.8h, #0 +; CHECK-SD-NEXT: sshll v2.4s, v4.4h, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extsubu_v16i8_i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: usubl v2.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: usubl2 v3.8h, v0.16b, v1.16b +; CHECK-GI-NEXT: sshll v0.4s, v2.4h, #0 +; CHECK-GI-NEXT: sshll2 v1.4s, v2.8h, #0 +; CHECK-GI-NEXT: sshll v2.4s, v3.4h, #0 +; CHECK-GI-NEXT: sshll2 v3.4s, v3.8h, #0 +; CHECK-GI-NEXT: ret +entry: + %s0s = zext <16 x i8> %s0 to <16 x i32> + %s1s = zext <16 x i8> %s1 to <16 x i32> + %m = sub <16 x i32> %s0s, %s1s + ret <16 x i32> %m +} + define <8 x i64> @extadds_v8i8_i64(<8 x i8> %s0, <8 x i8> %s1) { -; CHECK-LABEL: extadds_v8i8_i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: saddl v0.8h, v0.8b, v1.8b -; CHECK-NEXT: sshll v1.4s, v0.4h, #0 -; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0 -; CHECK-NEXT: sshll v0.2d, v1.2s, #0 -; CHECK-NEXT: sshll2 v3.2d, v2.4s, #0 -; CHECK-NEXT: sshll2 v1.2d, v1.4s, #0 -; CHECK-NEXT: sshll v2.2d, v2.2s, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: extadds_v8i8_i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: saddl v0.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-SD-NEXT: sshll2 v2.4s, v0.8h, #0 +; CHECK-SD-NEXT: sshll v0.2d, v1.2s, #0 +; CHECK-SD-NEXT: sshll2 v3.2d, v2.4s, #0 +; CHECK-SD-NEXT: sshll2 v1.2d, v1.4s, #0 +; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extadds_v8i8_i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: saddl v0.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-GI-NEXT: sshll2 v3.4s, v0.8h, #0 +; CHECK-GI-NEXT: sshll v0.2d, v1.2s, #0 +; CHECK-GI-NEXT: sshll2 v1.2d, v1.4s, #0 +; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0 +; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0 +; CHECK-GI-NEXT: ret entry: %s0s = sext <8 x i8> %s0 to <8 x i64> %s1s = sext <8 x i8> %s1 to <8 x i64> @@ -170,16 +346,27 @@ entry: } define <8 x i64> @extaddu_v8i8_i64(<8 x i8> %s0, <8 x i8> %s1) { -; CHECK-LABEL: extaddu_v8i8_i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b -; CHECK-NEXT: ushll v1.4s, v0.4h, #0 -; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0 -; CHECK-NEXT: ushll v0.2d, v1.2s, #0 -; CHECK-NEXT: ushll2 v3.2d, v2.4s, #0 -; CHECK-NEXT: ushll2 v1.2d, v1.4s, #0 -; CHECK-NEXT: ushll v2.2d, v2.2s, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: extaddu_v8i8_i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: uaddl v0.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: ushll v1.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll2 v2.4s, v0.8h, #0 +; CHECK-SD-NEXT: ushll v0.2d, v1.2s, #0 +; CHECK-SD-NEXT: ushll2 v3.2d, v2.4s, #0 +; CHECK-SD-NEXT: ushll2 v1.2d, v1.4s, #0 +; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extaddu_v8i8_i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: uaddl v0.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushll2 v3.4s, v0.8h, #0 +; CHECK-GI-NEXT: ushll v0.2d, v1.2s, #0 +; CHECK-GI-NEXT: ushll2 v1.2d, v1.4s, #0 +; CHECK-GI-NEXT: ushll v2.2d, v3.2s, #0 +; CHECK-GI-NEXT: ushll2 v3.2d, v3.4s, #0 +; CHECK-GI-NEXT: ret entry: %s0s = zext <8 x i8> %s0 to <8 x i64> %s1s = zext <8 x i8> %s1 to <8 x i64> @@ -187,6 +374,384 @@ entry: ret <8 x i64> %m } +define <8 x i64> @extsubs_v8i8_i64(<8 x i8> %s0, <8 x i8> %s1) { +; CHECK-SD-LABEL: extsubs_v8i8_i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ssubl v0.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-SD-NEXT: sshll2 v2.4s, v0.8h, #0 +; CHECK-SD-NEXT: sshll v0.2d, v1.2s, #0 +; CHECK-SD-NEXT: sshll2 v3.2d, v2.4s, #0 +; CHECK-SD-NEXT: sshll2 v1.2d, v1.4s, #0 +; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extsubs_v8i8_i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ssubl v0.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-GI-NEXT: sshll2 v3.4s, v0.8h, #0 +; CHECK-GI-NEXT: sshll v0.2d, v1.2s, #0 +; CHECK-GI-NEXT: sshll2 v1.2d, v1.4s, #0 +; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0 +; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0 +; CHECK-GI-NEXT: ret +entry: + %s0s = sext <8 x i8> %s0 to <8 x i64> + %s1s = sext <8 x i8> %s1 to <8 x i64> + %m = sub <8 x i64> %s0s, %s1s + ret <8 x i64> %m +} + +define <8 x i64> @extsubu_v8i8_i64(<8 x i8> %s0, <8 x i8> %s1) { +; CHECK-SD-LABEL: extsubu_v8i8_i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: usubl v0.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-SD-NEXT: sshll2 v2.4s, v0.8h, #0 +; CHECK-SD-NEXT: sshll v0.2d, v1.2s, #0 +; CHECK-SD-NEXT: sshll2 v3.2d, v2.4s, #0 +; CHECK-SD-NEXT: sshll2 v1.2d, v1.4s, #0 +; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extsubu_v8i8_i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: usubl v0.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-GI-NEXT: sshll2 v3.4s, v0.8h, #0 +; CHECK-GI-NEXT: sshll v0.2d, v1.2s, #0 +; CHECK-GI-NEXT: sshll2 v1.2d, v1.4s, #0 +; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0 +; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0 +; CHECK-GI-NEXT: ret +entry: + %s0s = zext <8 x i8> %s0 to <8 x i64> + %s1s = zext <8 x i8> %s1 to <8 x i64> + %m = sub <8 x i64> %s0s, %s1s + ret <8 x i64> %m +} + +define <16 x i64> @extaddu_v16i8_i64(<16 x i8> %a, <16 x i8> %b) { +; CHECK-SD-LABEL: extaddu_v16i8_i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uaddl v2.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: uaddl2 v0.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: ushll v3.4s, v2.4h, #0 +; CHECK-SD-NEXT: ushll2 v2.4s, v2.8h, #0 +; CHECK-SD-NEXT: ushll v5.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll2 v6.4s, v0.8h, #0 +; CHECK-SD-NEXT: ushll2 v1.2d, v3.4s, #0 +; CHECK-SD-NEXT: ushll v0.2d, v3.2s, #0 +; CHECK-SD-NEXT: ushll2 v3.2d, v2.4s, #0 +; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-SD-NEXT: ushll v4.2d, v5.2s, #0 +; CHECK-SD-NEXT: ushll2 v7.2d, v6.4s, #0 +; CHECK-SD-NEXT: ushll2 v5.2d, v5.4s, #0 +; CHECK-SD-NEXT: ushll v6.2d, v6.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extaddu_v16i8_i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uaddl v2.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: uaddl2 v0.8h, v0.16b, v1.16b +; CHECK-GI-NEXT: ushll v1.4s, v2.4h, #0 +; CHECK-GI-NEXT: ushll2 v3.4s, v2.8h, #0 +; CHECK-GI-NEXT: ushll v5.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushll2 v7.4s, v0.8h, #0 +; CHECK-GI-NEXT: ushll v0.2d, v1.2s, #0 +; CHECK-GI-NEXT: ushll2 v1.2d, v1.4s, #0 +; CHECK-GI-NEXT: ushll v2.2d, v3.2s, #0 +; CHECK-GI-NEXT: ushll2 v3.2d, v3.4s, #0 +; CHECK-GI-NEXT: ushll v4.2d, v5.2s, #0 +; CHECK-GI-NEXT: ushll2 v5.2d, v5.4s, #0 +; CHECK-GI-NEXT: ushll v6.2d, v7.2s, #0 +; CHECK-GI-NEXT: ushll2 v7.2d, v7.4s, #0 +; CHECK-GI-NEXT: ret + %c = zext <16 x i8> %a to <16 x i64> + %d = zext <16 x i8> %b to <16 x i64> + %e = add <16 x i64> %c, %d + ret <16 x i64> %e +} + +define <16 x i64> @extadds_v16i8_i64(<16 x i8> %a, <16 x i8> %b) { +; CHECK-SD-LABEL: extadds_v16i8_i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: saddl v2.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: saddl2 v0.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: sshll v3.4s, v2.4h, #0 +; CHECK-SD-NEXT: sshll2 v2.4s, v2.8h, #0 +; CHECK-SD-NEXT: sshll v5.4s, v0.4h, #0 +; CHECK-SD-NEXT: sshll2 v6.4s, v0.8h, #0 +; CHECK-SD-NEXT: sshll2 v1.2d, v3.4s, #0 +; CHECK-SD-NEXT: sshll v0.2d, v3.2s, #0 +; CHECK-SD-NEXT: sshll2 v3.2d, v2.4s, #0 +; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0 +; CHECK-SD-NEXT: sshll v4.2d, v5.2s, #0 +; CHECK-SD-NEXT: sshll2 v7.2d, v6.4s, #0 +; CHECK-SD-NEXT: sshll2 v5.2d, v5.4s, #0 +; CHECK-SD-NEXT: sshll v6.2d, v6.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extadds_v16i8_i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: saddl v2.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: saddl2 v0.8h, v0.16b, v1.16b +; CHECK-GI-NEXT: sshll v1.4s, v2.4h, #0 +; CHECK-GI-NEXT: sshll2 v3.4s, v2.8h, #0 +; CHECK-GI-NEXT: sshll v5.4s, v0.4h, #0 +; CHECK-GI-NEXT: sshll2 v7.4s, v0.8h, #0 +; CHECK-GI-NEXT: sshll v0.2d, v1.2s, #0 +; CHECK-GI-NEXT: sshll2 v1.2d, v1.4s, #0 +; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0 +; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0 +; CHECK-GI-NEXT: sshll v4.2d, v5.2s, #0 +; CHECK-GI-NEXT: sshll2 v5.2d, v5.4s, #0 +; CHECK-GI-NEXT: sshll v6.2d, v7.2s, #0 +; CHECK-GI-NEXT: sshll2 v7.2d, v7.4s, #0 +; CHECK-GI-NEXT: ret + %c = sext <16 x i8> %a to <16 x i64> + %d = sext <16 x i8> %b to <16 x i64> + %e = add <16 x i64> %c, %d + ret <16 x i64> %e +} + +define <16 x i64> @extsubu_v16i8_i64(<16 x i8> %a, <16 x i8> %b) { +; CHECK-SD-LABEL: extsubu_v16i8_i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: usubl v2.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: usubl2 v0.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: sshll v3.4s, v2.4h, #0 +; CHECK-SD-NEXT: sshll2 v2.4s, v2.8h, #0 +; CHECK-SD-NEXT: sshll v5.4s, v0.4h, #0 +; CHECK-SD-NEXT: sshll2 v6.4s, v0.8h, #0 +; CHECK-SD-NEXT: sshll2 v1.2d, v3.4s, #0 +; CHECK-SD-NEXT: sshll v0.2d, v3.2s, #0 +; CHECK-SD-NEXT: sshll2 v3.2d, v2.4s, #0 +; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0 +; CHECK-SD-NEXT: sshll v4.2d, v5.2s, #0 +; CHECK-SD-NEXT: sshll2 v7.2d, v6.4s, #0 +; CHECK-SD-NEXT: sshll2 v5.2d, v5.4s, #0 +; CHECK-SD-NEXT: sshll v6.2d, v6.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extsubu_v16i8_i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: usubl v2.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: usubl2 v0.8h, v0.16b, v1.16b +; CHECK-GI-NEXT: sshll v1.4s, v2.4h, #0 +; CHECK-GI-NEXT: sshll2 v3.4s, v2.8h, #0 +; CHECK-GI-NEXT: sshll v5.4s, v0.4h, #0 +; CHECK-GI-NEXT: sshll2 v7.4s, v0.8h, #0 +; CHECK-GI-NEXT: sshll v0.2d, v1.2s, #0 +; CHECK-GI-NEXT: sshll2 v1.2d, v1.4s, #0 +; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0 +; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0 +; CHECK-GI-NEXT: sshll v4.2d, v5.2s, #0 +; CHECK-GI-NEXT: sshll2 v5.2d, v5.4s, #0 +; CHECK-GI-NEXT: sshll v6.2d, v7.2s, #0 +; CHECK-GI-NEXT: sshll2 v7.2d, v7.4s, #0 +; CHECK-GI-NEXT: ret + %c = zext <16 x i8> %a to <16 x i64> + %d = zext <16 x i8> %b to <16 x i64> + %e = sub <16 x i64> %c, %d + ret <16 x i64> %e +} + +define <16 x i64> @extsubs_v16i8_i64(<16 x i8> %a, <16 x i8> %b) { +; CHECK-SD-LABEL: extsubs_v16i8_i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ssubl v2.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: ssubl2 v0.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: sshll v3.4s, v2.4h, #0 +; CHECK-SD-NEXT: sshll2 v2.4s, v2.8h, #0 +; CHECK-SD-NEXT: sshll v5.4s, v0.4h, #0 +; CHECK-SD-NEXT: sshll2 v6.4s, v0.8h, #0 +; CHECK-SD-NEXT: sshll2 v1.2d, v3.4s, #0 +; CHECK-SD-NEXT: sshll v0.2d, v3.2s, #0 +; CHECK-SD-NEXT: sshll2 v3.2d, v2.4s, #0 +; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0 +; CHECK-SD-NEXT: sshll v4.2d, v5.2s, #0 +; CHECK-SD-NEXT: sshll2 v7.2d, v6.4s, #0 +; CHECK-SD-NEXT: sshll2 v5.2d, v5.4s, #0 +; CHECK-SD-NEXT: sshll v6.2d, v6.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extsubs_v16i8_i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ssubl v2.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: ssubl2 v0.8h, v0.16b, v1.16b +; CHECK-GI-NEXT: sshll v1.4s, v2.4h, #0 +; CHECK-GI-NEXT: sshll2 v3.4s, v2.8h, #0 +; CHECK-GI-NEXT: sshll v5.4s, v0.4h, #0 +; CHECK-GI-NEXT: sshll2 v7.4s, v0.8h, #0 +; CHECK-GI-NEXT: sshll v0.2d, v1.2s, #0 +; CHECK-GI-NEXT: sshll2 v1.2d, v1.4s, #0 +; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0 +; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0 +; CHECK-GI-NEXT: sshll v4.2d, v5.2s, #0 +; CHECK-GI-NEXT: sshll2 v5.2d, v5.4s, #0 +; CHECK-GI-NEXT: sshll v6.2d, v7.2s, #0 +; CHECK-GI-NEXT: sshll2 v7.2d, v7.4s, #0 +; CHECK-GI-NEXT: ret + %c = sext <16 x i8> %a to <16 x i64> + %d = sext <16 x i8> %b to <16 x i64> + %e = sub <16 x i64> %c, %d + ret <16 x i64> %e +} + +define <16 x i64> @extaddu_v16i16_i64(<16 x i16> %a, <16 x i16> %b) { +; CHECK-SD-LABEL: extaddu_v16i16_i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uaddl v5.4s, v1.4h, v3.4h +; CHECK-SD-NEXT: uaddl v4.4s, v0.4h, v2.4h +; CHECK-SD-NEXT: uaddl2 v2.4s, v0.8h, v2.8h +; CHECK-SD-NEXT: uaddl2 v6.4s, v1.8h, v3.8h +; CHECK-SD-NEXT: ushll2 v1.2d, v4.4s, #0 +; CHECK-SD-NEXT: ushll v0.2d, v4.2s, #0 +; CHECK-SD-NEXT: ushll2 v3.2d, v2.4s, #0 +; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-SD-NEXT: ushll v4.2d, v5.2s, #0 +; CHECK-SD-NEXT: ushll2 v7.2d, v6.4s, #0 +; CHECK-SD-NEXT: ushll2 v5.2d, v5.4s, #0 +; CHECK-SD-NEXT: ushll v6.2d, v6.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extaddu_v16i16_i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uaddl v4.4s, v0.4h, v2.4h +; CHECK-GI-NEXT: uaddl2 v5.4s, v0.8h, v2.8h +; CHECK-GI-NEXT: uaddl v6.4s, v1.4h, v3.4h +; CHECK-GI-NEXT: uaddl2 v7.4s, v1.8h, v3.8h +; CHECK-GI-NEXT: ushll v0.2d, v4.2s, #0 +; CHECK-GI-NEXT: ushll2 v1.2d, v4.4s, #0 +; CHECK-GI-NEXT: ushll v2.2d, v5.2s, #0 +; CHECK-GI-NEXT: ushll2 v3.2d, v5.4s, #0 +; CHECK-GI-NEXT: ushll v4.2d, v6.2s, #0 +; CHECK-GI-NEXT: ushll2 v5.2d, v6.4s, #0 +; CHECK-GI-NEXT: ushll v6.2d, v7.2s, #0 +; CHECK-GI-NEXT: ushll2 v7.2d, v7.4s, #0 +; CHECK-GI-NEXT: ret + %c = zext <16 x i16> %a to <16 x i64> + %d = zext <16 x i16> %b to <16 x i64> + %e = add <16 x i64> %c, %d + ret <16 x i64> %e +} + +define <16 x i64> @extadds_v16i16_i64(<16 x i16> %a, <16 x i16> %b) { +; CHECK-SD-LABEL: extadds_v16i16_i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: saddl v5.4s, v1.4h, v3.4h +; CHECK-SD-NEXT: saddl v4.4s, v0.4h, v2.4h +; CHECK-SD-NEXT: saddl2 v2.4s, v0.8h, v2.8h +; CHECK-SD-NEXT: saddl2 v6.4s, v1.8h, v3.8h +; CHECK-SD-NEXT: sshll2 v1.2d, v4.4s, #0 +; CHECK-SD-NEXT: sshll v0.2d, v4.2s, #0 +; CHECK-SD-NEXT: sshll2 v3.2d, v2.4s, #0 +; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0 +; CHECK-SD-NEXT: sshll v4.2d, v5.2s, #0 +; CHECK-SD-NEXT: sshll2 v7.2d, v6.4s, #0 +; CHECK-SD-NEXT: sshll2 v5.2d, v5.4s, #0 +; CHECK-SD-NEXT: sshll v6.2d, v6.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extadds_v16i16_i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: saddl v4.4s, v0.4h, v2.4h +; CHECK-GI-NEXT: saddl2 v5.4s, v0.8h, v2.8h +; CHECK-GI-NEXT: saddl v6.4s, v1.4h, v3.4h +; CHECK-GI-NEXT: saddl2 v7.4s, v1.8h, v3.8h +; CHECK-GI-NEXT: sshll v0.2d, v4.2s, #0 +; CHECK-GI-NEXT: sshll2 v1.2d, v4.4s, #0 +; CHECK-GI-NEXT: sshll v2.2d, v5.2s, #0 +; CHECK-GI-NEXT: sshll2 v3.2d, v5.4s, #0 +; CHECK-GI-NEXT: sshll v4.2d, v6.2s, #0 +; CHECK-GI-NEXT: sshll2 v5.2d, v6.4s, #0 +; CHECK-GI-NEXT: sshll v6.2d, v7.2s, #0 +; CHECK-GI-NEXT: sshll2 v7.2d, v7.4s, #0 +; CHECK-GI-NEXT: ret + %c = sext <16 x i16> %a to <16 x i64> + %d = sext <16 x i16> %b to <16 x i64> + %e = add <16 x i64> %c, %d + ret <16 x i64> %e +} + +define <16 x i64> @extsubu_v16i16_i64(<16 x i16> %a, <16 x i16> %b) { +; CHECK-SD-LABEL: extsubu_v16i16_i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: usubl v5.4s, v1.4h, v3.4h +; CHECK-SD-NEXT: usubl v4.4s, v0.4h, v2.4h +; CHECK-SD-NEXT: usubl2 v2.4s, v0.8h, v2.8h +; CHECK-SD-NEXT: usubl2 v6.4s, v1.8h, v3.8h +; CHECK-SD-NEXT: sshll2 v1.2d, v4.4s, #0 +; CHECK-SD-NEXT: sshll v0.2d, v4.2s, #0 +; CHECK-SD-NEXT: sshll2 v3.2d, v2.4s, #0 +; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0 +; CHECK-SD-NEXT: sshll v4.2d, v5.2s, #0 +; CHECK-SD-NEXT: sshll2 v7.2d, v6.4s, #0 +; CHECK-SD-NEXT: sshll2 v5.2d, v5.4s, #0 +; CHECK-SD-NEXT: sshll v6.2d, v6.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extsubu_v16i16_i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: usubl v4.4s, v0.4h, v2.4h +; CHECK-GI-NEXT: usubl2 v5.4s, v0.8h, v2.8h +; CHECK-GI-NEXT: usubl v6.4s, v1.4h, v3.4h +; CHECK-GI-NEXT: usubl2 v7.4s, v1.8h, v3.8h +; CHECK-GI-NEXT: sshll v0.2d, v4.2s, #0 +; CHECK-GI-NEXT: sshll2 v1.2d, v4.4s, #0 +; CHECK-GI-NEXT: sshll v2.2d, v5.2s, #0 +; CHECK-GI-NEXT: sshll2 v3.2d, v5.4s, #0 +; CHECK-GI-NEXT: sshll v4.2d, v6.2s, #0 +; CHECK-GI-NEXT: sshll2 v5.2d, v6.4s, #0 +; CHECK-GI-NEXT: sshll v6.2d, v7.2s, #0 +; CHECK-GI-NEXT: sshll2 v7.2d, v7.4s, #0 +; CHECK-GI-NEXT: ret + %c = zext <16 x i16> %a to <16 x i64> + %d = zext <16 x i16> %b to <16 x i64> + %e = sub <16 x i64> %c, %d + ret <16 x i64> %e +} + +define <16 x i64> @extsubs_v16i16_i64(<16 x i16> %a, <16 x i16> %b) { +; CHECK-SD-LABEL: extsubs_v16i16_i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ssubl v5.4s, v1.4h, v3.4h +; CHECK-SD-NEXT: ssubl v4.4s, v0.4h, v2.4h +; CHECK-SD-NEXT: ssubl2 v2.4s, v0.8h, v2.8h +; CHECK-SD-NEXT: ssubl2 v6.4s, v1.8h, v3.8h +; CHECK-SD-NEXT: sshll2 v1.2d, v4.4s, #0 +; CHECK-SD-NEXT: sshll v0.2d, v4.2s, #0 +; CHECK-SD-NEXT: sshll2 v3.2d, v2.4s, #0 +; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0 +; CHECK-SD-NEXT: sshll v4.2d, v5.2s, #0 +; CHECK-SD-NEXT: sshll2 v7.2d, v6.4s, #0 +; CHECK-SD-NEXT: sshll2 v5.2d, v5.4s, #0 +; CHECK-SD-NEXT: sshll v6.2d, v6.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extsubs_v16i16_i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ssubl v4.4s, v0.4h, v2.4h +; CHECK-GI-NEXT: ssubl2 v5.4s, v0.8h, v2.8h +; CHECK-GI-NEXT: ssubl v6.4s, v1.4h, v3.4h +; CHECK-GI-NEXT: ssubl2 v7.4s, v1.8h, v3.8h +; CHECK-GI-NEXT: sshll v0.2d, v4.2s, #0 +; CHECK-GI-NEXT: sshll2 v1.2d, v4.4s, #0 +; CHECK-GI-NEXT: sshll v2.2d, v5.2s, #0 +; CHECK-GI-NEXT: sshll2 v3.2d, v5.4s, #0 +; CHECK-GI-NEXT: sshll v4.2d, v6.2s, #0 +; CHECK-GI-NEXT: sshll2 v5.2d, v6.4s, #0 +; CHECK-GI-NEXT: sshll v6.2d, v7.2s, #0 +; CHECK-GI-NEXT: sshll2 v7.2d, v7.4s, #0 +; CHECK-GI-NEXT: ret + %c = sext <16 x i16> %a to <16 x i64> + %d = sext <16 x i16> %b to <16 x i64> + %e = sub <16 x i64> %c, %d + ret <16 x i64> %e +} + define <4 x i32> @extadds_v4i16_i32(<4 x i16> %s0, <4 x i16> %s1) { ; CHECK-LABEL: extadds_v4i16_i32: ; CHECK: // %bb.0: // %entry @@ -212,12 +777,19 @@ entry: } define <8 x i32> @extadds_v8i16_i32(<8 x i16> %s0, <8 x i16> %s1) { -; CHECK-LABEL: extadds_v8i16_i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: saddl2 v2.4s, v0.8h, v1.8h -; CHECK-NEXT: saddl v0.4s, v0.4h, v1.4h -; CHECK-NEXT: mov v1.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: extadds_v8i16_i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: saddl2 v2.4s, v0.8h, v1.8h +; CHECK-SD-NEXT: saddl v0.4s, v0.4h, v1.4h +; CHECK-SD-NEXT: mov v1.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extadds_v8i16_i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: saddl v2.4s, v0.4h, v1.4h +; CHECK-GI-NEXT: saddl2 v1.4s, v0.8h, v1.8h +; CHECK-GI-NEXT: mov v0.16b, v2.16b +; CHECK-GI-NEXT: ret entry: %s0s = sext <8 x i16> %s0 to <8 x i32> %s1s = sext <8 x i16> %s1 to <8 x i32> @@ -226,12 +798,19 @@ entry: } define <8 x i32> @extaddu_v8i16_i32(<8 x i16> %s0, <8 x i16> %s1) { -; CHECK-LABEL: extaddu_v8i16_i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uaddl2 v2.4s, v0.8h, v1.8h -; CHECK-NEXT: uaddl v0.4s, v0.4h, v1.4h -; CHECK-NEXT: mov v1.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: extaddu_v8i16_i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: uaddl2 v2.4s, v0.8h, v1.8h +; CHECK-SD-NEXT: uaddl v0.4s, v0.4h, v1.4h +; CHECK-SD-NEXT: mov v1.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extaddu_v8i16_i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: uaddl v2.4s, v0.4h, v1.4h +; CHECK-GI-NEXT: uaddl2 v1.4s, v0.8h, v1.8h +; CHECK-GI-NEXT: mov v0.16b, v2.16b +; CHECK-GI-NEXT: ret entry: %s0s = zext <8 x i16> %s0 to <8 x i32> %s1s = zext <8 x i16> %s1 to <8 x i32> @@ -240,16 +819,26 @@ entry: } define <16 x i32> @extadds_v16i16_i32(<16 x i16> %s0, <16 x i16> %s1) { -; CHECK-LABEL: extadds_v16i16_i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: saddl2 v4.4s, v1.8h, v3.8h -; CHECK-NEXT: saddl v5.4s, v0.4h, v2.4h -; CHECK-NEXT: saddl2 v6.4s, v0.8h, v2.8h -; CHECK-NEXT: saddl v2.4s, v1.4h, v3.4h -; CHECK-NEXT: mov v0.16b, v5.16b -; CHECK-NEXT: mov v1.16b, v6.16b -; CHECK-NEXT: mov v3.16b, v4.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: extadds_v16i16_i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: saddl2 v4.4s, v1.8h, v3.8h +; CHECK-SD-NEXT: saddl v5.4s, v0.4h, v2.4h +; CHECK-SD-NEXT: saddl2 v6.4s, v0.8h, v2.8h +; CHECK-SD-NEXT: saddl v2.4s, v1.4h, v3.4h +; CHECK-SD-NEXT: mov v0.16b, v5.16b +; CHECK-SD-NEXT: mov v1.16b, v6.16b +; CHECK-SD-NEXT: mov v3.16b, v4.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extadds_v16i16_i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: saddl v4.4s, v0.4h, v2.4h +; CHECK-GI-NEXT: saddl2 v5.4s, v0.8h, v2.8h +; CHECK-GI-NEXT: saddl v2.4s, v1.4h, v3.4h +; CHECK-GI-NEXT: saddl2 v3.4s, v1.8h, v3.8h +; CHECK-GI-NEXT: mov v0.16b, v4.16b +; CHECK-GI-NEXT: mov v1.16b, v5.16b +; CHECK-GI-NEXT: ret entry: %s0s = sext <16 x i16> %s0 to <16 x i32> %s1s = sext <16 x i16> %s1 to <16 x i32> @@ -258,16 +847,26 @@ entry: } define <16 x i32> @extaddu_v16i16_i32(<16 x i16> %s0, <16 x i16> %s1) { -; CHECK-LABEL: extaddu_v16i16_i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uaddl2 v4.4s, v1.8h, v3.8h -; CHECK-NEXT: uaddl v5.4s, v0.4h, v2.4h -; CHECK-NEXT: uaddl2 v6.4s, v0.8h, v2.8h -; CHECK-NEXT: uaddl v2.4s, v1.4h, v3.4h -; CHECK-NEXT: mov v0.16b, v5.16b -; CHECK-NEXT: mov v1.16b, v6.16b -; CHECK-NEXT: mov v3.16b, v4.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: extaddu_v16i16_i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: uaddl2 v4.4s, v1.8h, v3.8h +; CHECK-SD-NEXT: uaddl v5.4s, v0.4h, v2.4h +; CHECK-SD-NEXT: uaddl2 v6.4s, v0.8h, v2.8h +; CHECK-SD-NEXT: uaddl v2.4s, v1.4h, v3.4h +; CHECK-SD-NEXT: mov v0.16b, v5.16b +; CHECK-SD-NEXT: mov v1.16b, v6.16b +; CHECK-SD-NEXT: mov v3.16b, v4.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extaddu_v16i16_i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: uaddl v4.4s, v0.4h, v2.4h +; CHECK-GI-NEXT: uaddl2 v5.4s, v0.8h, v2.8h +; CHECK-GI-NEXT: uaddl v2.4s, v1.4h, v3.4h +; CHECK-GI-NEXT: uaddl2 v3.4s, v1.8h, v3.8h +; CHECK-GI-NEXT: mov v0.16b, v4.16b +; CHECK-GI-NEXT: mov v1.16b, v5.16b +; CHECK-GI-NEXT: ret entry: %s0s = zext <16 x i16> %s0 to <16 x i32> %s1s = zext <16 x i16> %s1 to <16 x i32> @@ -276,12 +875,19 @@ entry: } define <4 x i64> @extadds_v4i16_i64(<4 x i16> %s0, <4 x i16> %s1) { -; CHECK-LABEL: extadds_v4i16_i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: saddl v0.4s, v0.4h, v1.4h -; CHECK-NEXT: sshll2 v1.2d, v0.4s, #0 -; CHECK-NEXT: sshll v0.2d, v0.2s, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: extadds_v4i16_i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: saddl v0.4s, v0.4h, v1.4h +; CHECK-SD-NEXT: sshll2 v1.2d, v0.4s, #0 +; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extadds_v4i16_i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: saddl v1.4s, v0.4h, v1.4h +; CHECK-GI-NEXT: sshll v0.2d, v1.2s, #0 +; CHECK-GI-NEXT: sshll2 v1.2d, v1.4s, #0 +; CHECK-GI-NEXT: ret entry: %s0s = sext <4 x i16> %s0 to <4 x i64> %s1s = sext <4 x i16> %s1 to <4 x i64> @@ -290,12 +896,19 @@ entry: } define <4 x i64> @extaddu_v4i16_i64(<4 x i16> %s0, <4 x i16> %s1) { -; CHECK-LABEL: extaddu_v4i16_i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uaddl v0.4s, v0.4h, v1.4h -; CHECK-NEXT: ushll2 v1.2d, v0.4s, #0 -; CHECK-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: extaddu_v4i16_i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: uaddl v0.4s, v0.4h, v1.4h +; CHECK-SD-NEXT: ushll2 v1.2d, v0.4s, #0 +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extaddu_v4i16_i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: uaddl v1.4s, v0.4h, v1.4h +; CHECK-GI-NEXT: ushll v0.2d, v1.2s, #0 +; CHECK-GI-NEXT: ushll2 v1.2d, v1.4s, #0 +; CHECK-GI-NEXT: ret entry: %s0s = zext <4 x i16> %s0 to <4 x i64> %s1s = zext <4 x i16> %s1 to <4 x i64> @@ -304,15 +917,25 @@ entry: } define <8 x i64> @extadds_v8i16_i64(<8 x i16> %s0, <8 x i16> %s1) { -; CHECK-LABEL: extadds_v8i16_i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: saddl v2.4s, v0.4h, v1.4h -; CHECK-NEXT: saddl2 v4.4s, v0.8h, v1.8h -; CHECK-NEXT: sshll v0.2d, v2.2s, #0 -; CHECK-NEXT: sshll2 v3.2d, v4.4s, #0 -; CHECK-NEXT: sshll2 v1.2d, v2.4s, #0 -; CHECK-NEXT: sshll v2.2d, v4.2s, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: extadds_v8i16_i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: saddl v2.4s, v0.4h, v1.4h +; CHECK-SD-NEXT: saddl2 v4.4s, v0.8h, v1.8h +; CHECK-SD-NEXT: sshll v0.2d, v2.2s, #0 +; CHECK-SD-NEXT: sshll2 v3.2d, v4.4s, #0 +; CHECK-SD-NEXT: sshll2 v1.2d, v2.4s, #0 +; CHECK-SD-NEXT: sshll v2.2d, v4.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extadds_v8i16_i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: saddl v2.4s, v0.4h, v1.4h +; CHECK-GI-NEXT: saddl2 v3.4s, v0.8h, v1.8h +; CHECK-GI-NEXT: sshll v0.2d, v2.2s, #0 +; CHECK-GI-NEXT: sshll2 v1.2d, v2.4s, #0 +; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0 +; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0 +; CHECK-GI-NEXT: ret entry: %s0s = sext <8 x i16> %s0 to <8 x i64> %s1s = sext <8 x i16> %s1 to <8 x i64> @@ -321,15 +944,25 @@ entry: } define <8 x i64> @extaddu_v8i16_i64(<8 x i16> %s0, <8 x i16> %s1) { -; CHECK-LABEL: extaddu_v8i16_i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uaddl v2.4s, v0.4h, v1.4h -; CHECK-NEXT: uaddl2 v4.4s, v0.8h, v1.8h -; CHECK-NEXT: ushll v0.2d, v2.2s, #0 -; CHECK-NEXT: ushll2 v3.2d, v4.4s, #0 -; CHECK-NEXT: ushll2 v1.2d, v2.4s, #0 -; CHECK-NEXT: ushll v2.2d, v4.2s, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: extaddu_v8i16_i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: uaddl v2.4s, v0.4h, v1.4h +; CHECK-SD-NEXT: uaddl2 v4.4s, v0.8h, v1.8h +; CHECK-SD-NEXT: ushll v0.2d, v2.2s, #0 +; CHECK-SD-NEXT: ushll2 v3.2d, v4.4s, #0 +; CHECK-SD-NEXT: ushll2 v1.2d, v2.4s, #0 +; CHECK-SD-NEXT: ushll v2.2d, v4.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extaddu_v8i16_i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: uaddl v2.4s, v0.4h, v1.4h +; CHECK-GI-NEXT: uaddl2 v3.4s, v0.8h, v1.8h +; CHECK-GI-NEXT: ushll v0.2d, v2.2s, #0 +; CHECK-GI-NEXT: ushll2 v1.2d, v2.4s, #0 +; CHECK-GI-NEXT: ushll v2.2d, v3.2s, #0 +; CHECK-GI-NEXT: ushll2 v3.2d, v3.4s, #0 +; CHECK-GI-NEXT: ret entry: %s0s = zext <8 x i16> %s0 to <8 x i64> %s1s = zext <8 x i16> %s1 to <8 x i64> @@ -337,6 +970,60 @@ entry: ret <8 x i64> %m } +define <8 x i64> @extsubs_v8i16_i64(<8 x i16> %s0, <8 x i16> %s1) { +; CHECK-SD-LABEL: extsubs_v8i16_i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ssubl v2.4s, v0.4h, v1.4h +; CHECK-SD-NEXT: ssubl2 v4.4s, v0.8h, v1.8h +; CHECK-SD-NEXT: sshll v0.2d, v2.2s, #0 +; CHECK-SD-NEXT: sshll2 v3.2d, v4.4s, #0 +; CHECK-SD-NEXT: sshll2 v1.2d, v2.4s, #0 +; CHECK-SD-NEXT: sshll v2.2d, v4.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extsubs_v8i16_i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ssubl v2.4s, v0.4h, v1.4h +; CHECK-GI-NEXT: ssubl2 v3.4s, v0.8h, v1.8h +; CHECK-GI-NEXT: sshll v0.2d, v2.2s, #0 +; CHECK-GI-NEXT: sshll2 v1.2d, v2.4s, #0 +; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0 +; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0 +; CHECK-GI-NEXT: ret +entry: + %s0s = sext <8 x i16> %s0 to <8 x i64> + %s1s = sext <8 x i16> %s1 to <8 x i64> + %m = sub <8 x i64> %s0s, %s1s + ret <8 x i64> %m +} + +define <8 x i64> @extsubu_v8i16_i64(<8 x i16> %s0, <8 x i16> %s1) { +; CHECK-SD-LABEL: extsubu_v8i16_i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: usubl v2.4s, v0.4h, v1.4h +; CHECK-SD-NEXT: usubl2 v4.4s, v0.8h, v1.8h +; CHECK-SD-NEXT: sshll v0.2d, v2.2s, #0 +; CHECK-SD-NEXT: sshll2 v3.2d, v4.4s, #0 +; CHECK-SD-NEXT: sshll2 v1.2d, v2.4s, #0 +; CHECK-SD-NEXT: sshll v2.2d, v4.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extsubu_v8i16_i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: usubl v2.4s, v0.4h, v1.4h +; CHECK-GI-NEXT: usubl2 v3.4s, v0.8h, v1.8h +; CHECK-GI-NEXT: sshll v0.2d, v2.2s, #0 +; CHECK-GI-NEXT: sshll2 v1.2d, v2.4s, #0 +; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0 +; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0 +; CHECK-GI-NEXT: ret +entry: + %s0s = zext <8 x i16> %s0 to <8 x i64> + %s1s = zext <8 x i16> %s1 to <8 x i64> + %m = sub <8 x i64> %s0s, %s1s + ret <8 x i64> %m +} + define <2 x i64> @extadds_v2i32_i64(<2 x i32> %s0, <2 x i32> %s1) { ; CHECK-LABEL: extadds_v2i32_i64: ; CHECK: // %bb.0: // %entry @@ -362,12 +1049,19 @@ entry: } define <4 x i64> @extadds_v4i32_i64(<4 x i32> %s0, <4 x i32> %s1) { -; CHECK-LABEL: extadds_v4i32_i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: saddl2 v2.2d, v0.4s, v1.4s -; CHECK-NEXT: saddl v0.2d, v0.2s, v1.2s -; CHECK-NEXT: mov v1.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: extadds_v4i32_i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: saddl2 v2.2d, v0.4s, v1.4s +; CHECK-SD-NEXT: saddl v0.2d, v0.2s, v1.2s +; CHECK-SD-NEXT: mov v1.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extadds_v4i32_i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: saddl v2.2d, v0.2s, v1.2s +; CHECK-GI-NEXT: saddl2 v1.2d, v0.4s, v1.4s +; CHECK-GI-NEXT: mov v0.16b, v2.16b +; CHECK-GI-NEXT: ret entry: %s0s = sext <4 x i32> %s0 to <4 x i64> %s1s = sext <4 x i32> %s1 to <4 x i64> @@ -376,12 +1070,19 @@ entry: } define <4 x i64> @extaddu_v4i32_i64(<4 x i32> %s0, <4 x i32> %s1) { -; CHECK-LABEL: extaddu_v4i32_i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uaddl2 v2.2d, v0.4s, v1.4s -; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s -; CHECK-NEXT: mov v1.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: extaddu_v4i32_i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: uaddl2 v2.2d, v0.4s, v1.4s +; CHECK-SD-NEXT: uaddl v0.2d, v0.2s, v1.2s +; CHECK-SD-NEXT: mov v1.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extaddu_v4i32_i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: uaddl v2.2d, v0.2s, v1.2s +; CHECK-GI-NEXT: uaddl2 v1.2d, v0.4s, v1.4s +; CHECK-GI-NEXT: mov v0.16b, v2.16b +; CHECK-GI-NEXT: ret entry: %s0s = zext <4 x i32> %s0 to <4 x i64> %s1s = zext <4 x i32> %s1 to <4 x i64> @@ -390,16 +1091,26 @@ entry: } define <8 x i64> @extadds_v8i32_i64(<8 x i32> %s0, <8 x i32> %s1) { -; CHECK-LABEL: extadds_v8i32_i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: saddl2 v4.2d, v1.4s, v3.4s -; CHECK-NEXT: saddl v5.2d, v0.2s, v2.2s -; CHECK-NEXT: saddl2 v6.2d, v0.4s, v2.4s -; CHECK-NEXT: saddl v2.2d, v1.2s, v3.2s -; CHECK-NEXT: mov v0.16b, v5.16b -; CHECK-NEXT: mov v1.16b, v6.16b -; CHECK-NEXT: mov v3.16b, v4.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: extadds_v8i32_i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: saddl2 v4.2d, v1.4s, v3.4s +; CHECK-SD-NEXT: saddl v5.2d, v0.2s, v2.2s +; CHECK-SD-NEXT: saddl2 v6.2d, v0.4s, v2.4s +; CHECK-SD-NEXT: saddl v2.2d, v1.2s, v3.2s +; CHECK-SD-NEXT: mov v0.16b, v5.16b +; CHECK-SD-NEXT: mov v1.16b, v6.16b +; CHECK-SD-NEXT: mov v3.16b, v4.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extadds_v8i32_i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: saddl v4.2d, v0.2s, v2.2s +; CHECK-GI-NEXT: saddl2 v5.2d, v0.4s, v2.4s +; CHECK-GI-NEXT: saddl v2.2d, v1.2s, v3.2s +; CHECK-GI-NEXT: saddl2 v3.2d, v1.4s, v3.4s +; CHECK-GI-NEXT: mov v0.16b, v4.16b +; CHECK-GI-NEXT: mov v1.16b, v5.16b +; CHECK-GI-NEXT: ret entry: %s0s = sext <8 x i32> %s0 to <8 x i64> %s1s = sext <8 x i32> %s1 to <8 x i64> @@ -408,16 +1119,26 @@ entry: } define <8 x i64> @extaddu_v8i32_i64(<8 x i32> %s0, <8 x i32> %s1) { -; CHECK-LABEL: extaddu_v8i32_i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uaddl2 v4.2d, v1.4s, v3.4s -; CHECK-NEXT: uaddl v5.2d, v0.2s, v2.2s -; CHECK-NEXT: uaddl2 v6.2d, v0.4s, v2.4s -; CHECK-NEXT: uaddl v2.2d, v1.2s, v3.2s -; CHECK-NEXT: mov v0.16b, v5.16b -; CHECK-NEXT: mov v1.16b, v6.16b -; CHECK-NEXT: mov v3.16b, v4.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: extaddu_v8i32_i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: uaddl2 v4.2d, v1.4s, v3.4s +; CHECK-SD-NEXT: uaddl v5.2d, v0.2s, v2.2s +; CHECK-SD-NEXT: uaddl2 v6.2d, v0.4s, v2.4s +; CHECK-SD-NEXT: uaddl v2.2d, v1.2s, v3.2s +; CHECK-SD-NEXT: mov v0.16b, v5.16b +; CHECK-SD-NEXT: mov v1.16b, v6.16b +; CHECK-SD-NEXT: mov v3.16b, v4.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extaddu_v8i32_i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: uaddl v4.2d, v0.2s, v2.2s +; CHECK-GI-NEXT: uaddl2 v5.2d, v0.4s, v2.4s +; CHECK-GI-NEXT: uaddl v2.2d, v1.2s, v3.2s +; CHECK-GI-NEXT: uaddl2 v3.2d, v1.4s, v3.4s +; CHECK-GI-NEXT: mov v0.16b, v4.16b +; CHECK-GI-NEXT: mov v1.16b, v5.16b +; CHECK-GI-NEXT: ret entry: %s0s = zext <8 x i32> %s0 to <8 x i64> %s1s = zext <8 x i32> %s1 to <8 x i64> @@ -426,17 +1147,33 @@ entry: } define <16 x i32> @add_zs(<16 x i8> %s0, <16 x i8> %s1) { -; CHECK-LABEL: add_zs: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v2.8h, v0.8b, #0 -; CHECK-NEXT: ushll2 v0.8h, v0.16b, #0 -; CHECK-NEXT: saddw v2.8h, v2.8h, v1.8b -; CHECK-NEXT: saddw2 v4.8h, v0.8h, v1.16b -; CHECK-NEXT: sshll v0.4s, v2.4h, #0 -; CHECK-NEXT: sshll2 v3.4s, v4.8h, #0 -; CHECK-NEXT: sshll2 v1.4s, v2.8h, #0 -; CHECK-NEXT: sshll v2.4s, v4.4h, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: add_zs: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ushll v2.8h, v0.8b, #0 +; CHECK-SD-NEXT: ushll2 v0.8h, v0.16b, #0 +; CHECK-SD-NEXT: saddw v2.8h, v2.8h, v1.8b +; CHECK-SD-NEXT: saddw2 v4.8h, v0.8h, v1.16b +; CHECK-SD-NEXT: sshll v0.4s, v2.4h, #0 +; CHECK-SD-NEXT: sshll2 v3.4s, v4.8h, #0 +; CHECK-SD-NEXT: sshll2 v1.4s, v2.8h, #0 +; CHECK-SD-NEXT: sshll v2.4s, v4.4h, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: add_zs: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sshll v2.8h, v1.8b, #0 +; CHECK-GI-NEXT: sshll2 v1.8h, v1.16b, #0 +; CHECK-GI-NEXT: ushll v3.8h, v0.8b, #0 +; CHECK-GI-NEXT: ushll2 v4.8h, v0.16b, #0 +; CHECK-GI-NEXT: sshll v0.4s, v2.4h, #0 +; CHECK-GI-NEXT: sshll2 v2.4s, v2.8h, #0 +; CHECK-GI-NEXT: sshll v5.4s, v1.4h, #0 +; CHECK-GI-NEXT: sshll2 v6.4s, v1.8h, #0 +; CHECK-GI-NEXT: uaddw v0.4s, v0.4s, v3.4h +; CHECK-GI-NEXT: uaddw2 v1.4s, v2.4s, v3.8h +; CHECK-GI-NEXT: uaddw v2.4s, v5.4s, v4.4h +; CHECK-GI-NEXT: uaddw2 v3.4s, v6.4s, v4.8h +; CHECK-GI-NEXT: ret entry: %s0s = zext <16 x i8> %s0 to <16 x i32> %s1s = sext <16 x i8> %s1 to <16 x i32> @@ -445,87 +1182,180 @@ entry: } define <20 x i32> @v20(<20 x i8> %s0, <20 x i8> %s1) { -; CHECK-LABEL: v20: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: ldr b2, [sp, #160] -; CHECK-NEXT: add x10, sp, #168 -; CHECK-NEXT: ldr b3, [sp] -; CHECK-NEXT: add x11, sp, #8 -; CHECK-NEXT: ldr b1, [sp, #96] -; CHECK-NEXT: ld1 { v2.b }[1], [x10] -; CHECK-NEXT: add x9, sp, #104 -; CHECK-NEXT: add x10, sp, #176 -; CHECK-NEXT: mov v0.b[1], w1 -; CHECK-NEXT: ld1 { v3.b }[1], [x11] -; CHECK-NEXT: ld1 { v1.b }[1], [x9] -; CHECK-NEXT: add x11, sp, #16 -; CHECK-NEXT: add x9, sp, #112 -; CHECK-NEXT: add x13, sp, #184 -; CHECK-NEXT: ld1 { v2.b }[2], [x10] -; CHECK-NEXT: add x12, sp, #120 -; CHECK-NEXT: add x14, sp, #32 -; CHECK-NEXT: ld1 { v3.b }[2], [x11] -; CHECK-NEXT: ld1 { v1.b }[2], [x9] -; CHECK-NEXT: ldr b5, [sp, #64] -; CHECK-NEXT: mov v0.b[2], w2 -; CHECK-NEXT: ldr b4, [sp, #224] -; CHECK-NEXT: add x11, sp, #128 -; CHECK-NEXT: ld1 { v2.b }[3], [x13] -; CHECK-NEXT: add x13, sp, #24 -; CHECK-NEXT: add x10, sp, #136 -; CHECK-NEXT: ld1 { v3.b }[3], [x13] -; CHECK-NEXT: ld1 { v1.b }[3], [x12] -; CHECK-NEXT: add x12, sp, #192 -; CHECK-NEXT: add x13, sp, #200 -; CHECK-NEXT: add x15, sp, #80 -; CHECK-NEXT: add x9, sp, #144 -; CHECK-NEXT: mov v0.b[3], w3 -; CHECK-NEXT: ld1 { v2.b }[4], [x12] -; CHECK-NEXT: add x12, sp, #232 -; CHECK-NEXT: ld1 { v3.b }[4], [x14] -; CHECK-NEXT: add x14, sp, #72 -; CHECK-NEXT: ld1 { v4.b }[1], [x12] -; CHECK-NEXT: ld1 { v5.b }[1], [x14] -; CHECK-NEXT: add x14, sp, #40 -; CHECK-NEXT: ld1 { v1.b }[4], [x11] -; CHECK-NEXT: ld1 { v2.b }[5], [x13] -; CHECK-NEXT: add x12, sp, #208 -; CHECK-NEXT: add x13, sp, #48 -; CHECK-NEXT: mov v0.b[4], w4 -; CHECK-NEXT: ld1 { v3.b }[5], [x14] -; CHECK-NEXT: add x14, sp, #240 -; CHECK-NEXT: ld1 { v4.b }[2], [x14] -; CHECK-NEXT: ld1 { v5.b }[2], [x15] -; CHECK-NEXT: ld1 { v1.b }[5], [x10] -; CHECK-NEXT: ld1 { v2.b }[6], [x12] -; CHECK-NEXT: add x11, sp, #216 -; CHECK-NEXT: add x10, sp, #56 -; CHECK-NEXT: ld1 { v3.b }[6], [x13] -; CHECK-NEXT: add x12, sp, #248 -; CHECK-NEXT: add x13, sp, #88 -; CHECK-NEXT: mov v0.b[5], w5 -; CHECK-NEXT: ld1 { v4.b }[3], [x12] -; CHECK-NEXT: ld1 { v5.b }[3], [x13] -; CHECK-NEXT: ld1 { v1.b }[6], [x9] -; CHECK-NEXT: ld1 { v2.b }[7], [x11] -; CHECK-NEXT: add x9, sp, #152 -; CHECK-NEXT: ld1 { v3.b }[7], [x10] -; CHECK-NEXT: uaddl v4.8h, v5.8b, v4.8b -; CHECK-NEXT: mov v0.b[6], w6 -; CHECK-NEXT: ld1 { v1.b }[7], [x9] -; CHECK-NEXT: uaddl v2.8h, v3.8b, v2.8b -; CHECK-NEXT: ushll v3.4s, v4.4h, #0 -; CHECK-NEXT: mov v0.b[7], w7 -; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b -; CHECK-NEXT: ushll2 v1.4s, v2.8h, #0 -; CHECK-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-NEXT: stp q1, q3, [x8, #48] -; CHECK-NEXT: ushll2 v3.4s, v0.8h, #0 -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: stp q3, q2, [x8, #16] -; CHECK-NEXT: str q0, [x8] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v20: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: ldr b2, [sp, #160] +; CHECK-SD-NEXT: add x10, sp, #168 +; CHECK-SD-NEXT: ldr b3, [sp] +; CHECK-SD-NEXT: add x11, sp, #8 +; CHECK-SD-NEXT: ldr b1, [sp, #96] +; CHECK-SD-NEXT: ld1 { v2.b }[1], [x10] +; CHECK-SD-NEXT: add x9, sp, #104 +; CHECK-SD-NEXT: add x10, sp, #176 +; CHECK-SD-NEXT: mov v0.b[1], w1 +; CHECK-SD-NEXT: ld1 { v3.b }[1], [x11] +; CHECK-SD-NEXT: ld1 { v1.b }[1], [x9] +; CHECK-SD-NEXT: add x11, sp, #16 +; CHECK-SD-NEXT: add x9, sp, #112 +; CHECK-SD-NEXT: add x13, sp, #184 +; CHECK-SD-NEXT: ld1 { v2.b }[2], [x10] +; CHECK-SD-NEXT: add x12, sp, #120 +; CHECK-SD-NEXT: add x14, sp, #32 +; CHECK-SD-NEXT: ld1 { v3.b }[2], [x11] +; CHECK-SD-NEXT: ld1 { v1.b }[2], [x9] +; CHECK-SD-NEXT: ldr b5, [sp, #64] +; CHECK-SD-NEXT: mov v0.b[2], w2 +; CHECK-SD-NEXT: ldr b4, [sp, #224] +; CHECK-SD-NEXT: add x11, sp, #128 +; CHECK-SD-NEXT: ld1 { v2.b }[3], [x13] +; CHECK-SD-NEXT: add x13, sp, #24 +; CHECK-SD-NEXT: add x10, sp, #136 +; CHECK-SD-NEXT: ld1 { v3.b }[3], [x13] +; CHECK-SD-NEXT: ld1 { v1.b }[3], [x12] +; CHECK-SD-NEXT: add x12, sp, #192 +; CHECK-SD-NEXT: add x13, sp, #200 +; CHECK-SD-NEXT: add x15, sp, #80 +; CHECK-SD-NEXT: add x9, sp, #144 +; CHECK-SD-NEXT: mov v0.b[3], w3 +; CHECK-SD-NEXT: ld1 { v2.b }[4], [x12] +; CHECK-SD-NEXT: add x12, sp, #232 +; CHECK-SD-NEXT: ld1 { v3.b }[4], [x14] +; CHECK-SD-NEXT: add x14, sp, #72 +; CHECK-SD-NEXT: ld1 { v4.b }[1], [x12] +; CHECK-SD-NEXT: ld1 { v5.b }[1], [x14] +; CHECK-SD-NEXT: add x14, sp, #40 +; CHECK-SD-NEXT: ld1 { v1.b }[4], [x11] +; CHECK-SD-NEXT: ld1 { v2.b }[5], [x13] +; CHECK-SD-NEXT: add x12, sp, #208 +; CHECK-SD-NEXT: add x13, sp, #48 +; CHECK-SD-NEXT: mov v0.b[4], w4 +; CHECK-SD-NEXT: ld1 { v3.b }[5], [x14] +; CHECK-SD-NEXT: add x14, sp, #240 +; CHECK-SD-NEXT: ld1 { v4.b }[2], [x14] +; CHECK-SD-NEXT: ld1 { v5.b }[2], [x15] +; CHECK-SD-NEXT: ld1 { v1.b }[5], [x10] +; CHECK-SD-NEXT: ld1 { v2.b }[6], [x12] +; CHECK-SD-NEXT: add x11, sp, #216 +; CHECK-SD-NEXT: add x10, sp, #56 +; CHECK-SD-NEXT: ld1 { v3.b }[6], [x13] +; CHECK-SD-NEXT: add x12, sp, #248 +; CHECK-SD-NEXT: add x13, sp, #88 +; CHECK-SD-NEXT: mov v0.b[5], w5 +; CHECK-SD-NEXT: ld1 { v4.b }[3], [x12] +; CHECK-SD-NEXT: ld1 { v5.b }[3], [x13] +; CHECK-SD-NEXT: ld1 { v1.b }[6], [x9] +; CHECK-SD-NEXT: ld1 { v2.b }[7], [x11] +; CHECK-SD-NEXT: add x9, sp, #152 +; CHECK-SD-NEXT: ld1 { v3.b }[7], [x10] +; CHECK-SD-NEXT: uaddl v4.8h, v5.8b, v4.8b +; CHECK-SD-NEXT: mov v0.b[6], w6 +; CHECK-SD-NEXT: ld1 { v1.b }[7], [x9] +; CHECK-SD-NEXT: uaddl v2.8h, v3.8b, v2.8b +; CHECK-SD-NEXT: ushll v3.4s, v4.4h, #0 +; CHECK-SD-NEXT: mov v0.b[7], w7 +; CHECK-SD-NEXT: uaddl v0.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: ushll2 v1.4s, v2.8h, #0 +; CHECK-SD-NEXT: ushll v2.4s, v2.4h, #0 +; CHECK-SD-NEXT: stp q1, q3, [x8, #48] +; CHECK-SD-NEXT: ushll2 v3.4s, v0.8h, #0 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: stp q3, q2, [x8, #16] +; CHECK-SD-NEXT: str q0, [x8] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v20: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ldr s0, [sp] +; CHECK-GI-NEXT: ldr s4, [sp, #8] +; CHECK-GI-NEXT: fmov s1, w0 +; CHECK-GI-NEXT: ldr s2, [sp, #32] +; CHECK-GI-NEXT: ldr s19, [sp, #40] +; CHECK-GI-NEXT: fmov s3, w4 +; CHECK-GI-NEXT: mov v0.s[1], v4.s[0] +; CHECK-GI-NEXT: ldr s16, [sp, #96] +; CHECK-GI-NEXT: ldr s22, [sp, #104] +; CHECK-GI-NEXT: mov v2.s[1], v19.s[0] +; CHECK-GI-NEXT: ldr s19, [sp, #128] +; CHECK-GI-NEXT: ldr s23, [sp, #136] +; CHECK-GI-NEXT: ldr s18, [sp, #16] +; CHECK-GI-NEXT: mov v1.s[1], w1 +; CHECK-GI-NEXT: mov v3.s[1], w5 +; CHECK-GI-NEXT: mov v16.s[1], v22.s[0] +; CHECK-GI-NEXT: mov v19.s[1], v23.s[0] +; CHECK-GI-NEXT: ldr s4, [sp, #64] +; CHECK-GI-NEXT: ldr s21, [sp, #72] +; CHECK-GI-NEXT: mov v0.s[2], v18.s[0] +; CHECK-GI-NEXT: ldr s18, [sp, #160] +; CHECK-GI-NEXT: ldr s24, [sp, #168] +; CHECK-GI-NEXT: ldr s20, [sp, #192] +; CHECK-GI-NEXT: ldr s25, [sp, #200] +; CHECK-GI-NEXT: ldr s22, [sp, #224] +; CHECK-GI-NEXT: ldr s27, [sp, #232] +; CHECK-GI-NEXT: ldr s23, [sp, #112] +; CHECK-GI-NEXT: ldr s26, [sp, #144] +; CHECK-GI-NEXT: mov v18.s[1], v24.s[0] +; CHECK-GI-NEXT: mov v20.s[1], v25.s[0] +; CHECK-GI-NEXT: mov v4.s[1], v21.s[0] +; CHECK-GI-NEXT: mov v22.s[1], v27.s[0] +; CHECK-GI-NEXT: mov v1.s[2], w2 +; CHECK-GI-NEXT: ldr s17, [sp, #48] +; CHECK-GI-NEXT: mov v3.s[2], w6 +; CHECK-GI-NEXT: mov v16.s[2], v23.s[0] +; CHECK-GI-NEXT: mov v19.s[2], v26.s[0] +; CHECK-GI-NEXT: ldr s7, [sp, #80] +; CHECK-GI-NEXT: ldr s21, [sp, #176] +; CHECK-GI-NEXT: ldr s24, [sp, #208] +; CHECK-GI-NEXT: ldr s25, [sp, #240] +; CHECK-GI-NEXT: mov v2.s[2], v17.s[0] +; CHECK-GI-NEXT: ldr s17, [sp, #120] +; CHECK-GI-NEXT: ldr s23, [sp, #152] +; CHECK-GI-NEXT: ldr s5, [sp, #24] +; CHECK-GI-NEXT: mov v18.s[2], v21.s[0] +; CHECK-GI-NEXT: mov v20.s[2], v24.s[0] +; CHECK-GI-NEXT: mov v4.s[2], v7.s[0] +; CHECK-GI-NEXT: mov v22.s[2], v25.s[0] +; CHECK-GI-NEXT: mov v1.s[3], w3 +; CHECK-GI-NEXT: mov v3.s[3], w7 +; CHECK-GI-NEXT: mov v16.s[3], v17.s[0] +; CHECK-GI-NEXT: mov v19.s[3], v23.s[0] +; CHECK-GI-NEXT: ldr s6, [sp, #56] +; CHECK-GI-NEXT: ldr s7, [sp, #184] +; CHECK-GI-NEXT: ldr s21, [sp, #216] +; CHECK-GI-NEXT: ldr s17, [sp, #88] +; CHECK-GI-NEXT: mov v0.s[3], v5.s[0] +; CHECK-GI-NEXT: ldr s5, [sp, #248] +; CHECK-GI-NEXT: mov v2.s[3], v6.s[0] +; CHECK-GI-NEXT: mov v18.s[3], v7.s[0] +; CHECK-GI-NEXT: mov v20.s[3], v21.s[0] +; CHECK-GI-NEXT: mov v4.s[3], v17.s[0] +; CHECK-GI-NEXT: mov v22.s[3], v5.s[0] +; CHECK-GI-NEXT: uzp1 v1.8h, v1.8h, v3.8h +; CHECK-GI-NEXT: movi v3.2d, #0xff00ff00ff00ff +; CHECK-GI-NEXT: uzp1 v5.8h, v16.8h, v19.8h +; CHECK-GI-NEXT: dup v6.4s, w8 +; CHECK-GI-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-GI-NEXT: uzp1 v2.8h, v18.8h, v20.8h +; CHECK-GI-NEXT: uzp1 v4.8h, v4.8h, v6.8h +; CHECK-GI-NEXT: uzp1 v6.8h, v22.8h, v6.8h +; CHECK-GI-NEXT: and v1.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: and v5.16b, v5.16b, v3.16b +; CHECK-GI-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-GI-NEXT: and v2.16b, v2.16b, v3.16b +; CHECK-GI-NEXT: add v1.8h, v1.8h, v5.8h +; CHECK-GI-NEXT: and v4.16b, v4.16b, v3.16b +; CHECK-GI-NEXT: and v3.16b, v6.16b, v3.16b +; CHECK-GI-NEXT: add v0.8h, v0.8h, v2.8h +; CHECK-GI-NEXT: ushll v2.4s, v1.4h, #0 +; CHECK-GI-NEXT: add v3.4h, v4.4h, v3.4h +; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0 +; CHECK-GI-NEXT: ushll v4.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0 +; CHECK-GI-NEXT: stp q2, q1, [x8] +; CHECK-GI-NEXT: ushll v2.4s, v3.4h, #0 +; CHECK-GI-NEXT: stp q4, q0, [x8, #32] +; CHECK-GI-NEXT: str q2, [x8, #64] +; CHECK-GI-NEXT: ret entry: %s0s = zext <20 x i8> %s0 to <20 x i32> %s1s = zext <20 x i8> %s1 to <20 x i32> @@ -534,98 +1364,165 @@ entry: } define <16 x i32> @i12(<16 x i12> %s0, <16 x i12> %s1) { -; CHECK-LABEL: i12: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x23, [sp, #-48]! // 8-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w21, -24 -; CHECK-NEXT: .cfi_offset w22, -32 -; CHECK-NEXT: .cfi_offset w23, -48 -; CHECK-NEXT: ldr w13, [sp, #112] -; CHECK-NEXT: ldr w14, [sp, #144] -; CHECK-NEXT: fmov s2, w4 -; CHECK-NEXT: ldr w17, [sp, #176] -; CHECK-NEXT: ldr w19, [sp, #208] -; CHECK-NEXT: fmov s3, w0 -; CHECK-NEXT: ldr w20, [sp, #80] -; CHECK-NEXT: ldr w21, [sp, #48] -; CHECK-NEXT: fmov s5, w13 -; CHECK-NEXT: fmov s4, w19 -; CHECK-NEXT: fmov s6, w17 -; CHECK-NEXT: fmov s7, w14 -; CHECK-NEXT: fmov s0, w20 -; CHECK-NEXT: fmov s1, w21 -; CHECK-NEXT: ldr w10, [sp, #120] -; CHECK-NEXT: ldr w11, [sp, #152] -; CHECK-NEXT: ldr w12, [sp, #184] -; CHECK-NEXT: ldr w15, [sp, #216] -; CHECK-NEXT: ldr w22, [sp, #88] -; CHECK-NEXT: ldr w23, [sp, #56] -; CHECK-NEXT: mov v2.h[1], w5 -; CHECK-NEXT: mov v3.h[1], w1 -; CHECK-NEXT: mov v5.h[1], w10 -; CHECK-NEXT: mov v4.h[1], w15 -; CHECK-NEXT: mov v0.h[1], w22 -; CHECK-NEXT: mov v1.h[1], w23 -; CHECK-NEXT: mov v6.h[1], w12 -; CHECK-NEXT: mov v7.h[1], w11 -; CHECK-NEXT: ldr w8, [sp, #128] -; CHECK-NEXT: ldr w9, [sp, #160] -; CHECK-NEXT: ldr w16, [sp, #64] -; CHECK-NEXT: ldr w18, [sp, #96] -; CHECK-NEXT: ldr w10, [sp, #192] -; CHECK-NEXT: ldr w11, [sp, #224] -; CHECK-NEXT: mov v2.h[2], w6 -; CHECK-NEXT: mov v3.h[2], w2 -; CHECK-NEXT: mov v0.h[2], w18 -; CHECK-NEXT: mov v1.h[2], w16 -; CHECK-NEXT: mov v5.h[2], w8 -; CHECK-NEXT: mov v4.h[2], w11 -; CHECK-NEXT: mov v6.h[2], w10 -; CHECK-NEXT: mov v7.h[2], w9 -; CHECK-NEXT: ldr w12, [sp, #72] -; CHECK-NEXT: ldr w13, [sp, #104] -; CHECK-NEXT: ldr w8, [sp, #136] -; CHECK-NEXT: ldr w9, [sp, #168] -; CHECK-NEXT: ldr w10, [sp, #200] -; CHECK-NEXT: ldr w11, [sp, #232] -; CHECK-NEXT: mov v0.h[3], w13 -; CHECK-NEXT: mov v1.h[3], w12 -; CHECK-NEXT: mov v2.h[3], w7 -; CHECK-NEXT: mov v3.h[3], w3 -; CHECK-NEXT: mov v5.h[3], w8 -; CHECK-NEXT: mov v4.h[3], w11 -; CHECK-NEXT: mov v6.h[3], w10 -; CHECK-NEXT: mov v7.h[3], w9 -; CHECK-NEXT: movi v16.4s, #15, msl #8 -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-NEXT: ushll v3.4s, v3.4h, #0 -; CHECK-NEXT: ushll v5.4s, v5.4h, #0 -; CHECK-NEXT: ushll v4.4s, v4.4h, #0 -; CHECK-NEXT: ushll v6.4s, v6.4h, #0 -; CHECK-NEXT: ushll v7.4s, v7.4h, #0 -; CHECK-NEXT: and v17.16b, v0.16b, v16.16b -; CHECK-NEXT: and v18.16b, v1.16b, v16.16b -; CHECK-NEXT: and v1.16b, v2.16b, v16.16b -; CHECK-NEXT: and v0.16b, v3.16b, v16.16b -; CHECK-NEXT: and v2.16b, v5.16b, v16.16b -; CHECK-NEXT: and v3.16b, v4.16b, v16.16b -; CHECK-NEXT: and v4.16b, v6.16b, v16.16b -; CHECK-NEXT: and v5.16b, v7.16b, v16.16b -; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s -; CHECK-NEXT: add v3.4s, v17.4s, v3.4s -; CHECK-NEXT: add v1.4s, v1.4s, v5.4s -; CHECK-NEXT: add v2.4s, v18.4s, v4.4s -; CHECK-NEXT: ldr x23, [sp], #48 // 8-byte Folded Reload -; CHECK-NEXT: ret +; CHECK-SD-LABEL: i12: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: str x23, [sp, #-48]! // 8-byte Folded Spill +; CHECK-SD-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w20, -16 +; CHECK-SD-NEXT: .cfi_offset w21, -24 +; CHECK-SD-NEXT: .cfi_offset w22, -32 +; CHECK-SD-NEXT: .cfi_offset w23, -48 +; CHECK-SD-NEXT: ldr w13, [sp, #112] +; CHECK-SD-NEXT: ldr w14, [sp, #144] +; CHECK-SD-NEXT: fmov s2, w4 +; CHECK-SD-NEXT: ldr w17, [sp, #176] +; CHECK-SD-NEXT: ldr w19, [sp, #208] +; CHECK-SD-NEXT: fmov s3, w0 +; CHECK-SD-NEXT: ldr w20, [sp, #80] +; CHECK-SD-NEXT: ldr w21, [sp, #48] +; CHECK-SD-NEXT: fmov s5, w13 +; CHECK-SD-NEXT: fmov s4, w19 +; CHECK-SD-NEXT: fmov s6, w17 +; CHECK-SD-NEXT: fmov s7, w14 +; CHECK-SD-NEXT: fmov s0, w20 +; CHECK-SD-NEXT: fmov s1, w21 +; CHECK-SD-NEXT: ldr w10, [sp, #120] +; CHECK-SD-NEXT: ldr w11, [sp, #152] +; CHECK-SD-NEXT: ldr w12, [sp, #184] +; CHECK-SD-NEXT: ldr w15, [sp, #216] +; CHECK-SD-NEXT: ldr w22, [sp, #88] +; CHECK-SD-NEXT: ldr w23, [sp, #56] +; CHECK-SD-NEXT: mov v2.h[1], w5 +; CHECK-SD-NEXT: mov v3.h[1], w1 +; CHECK-SD-NEXT: mov v5.h[1], w10 +; CHECK-SD-NEXT: mov v4.h[1], w15 +; CHECK-SD-NEXT: mov v0.h[1], w22 +; CHECK-SD-NEXT: mov v1.h[1], w23 +; CHECK-SD-NEXT: mov v6.h[1], w12 +; CHECK-SD-NEXT: mov v7.h[1], w11 +; CHECK-SD-NEXT: ldr w8, [sp, #128] +; CHECK-SD-NEXT: ldr w9, [sp, #160] +; CHECK-SD-NEXT: ldr w16, [sp, #64] +; CHECK-SD-NEXT: ldr w18, [sp, #96] +; CHECK-SD-NEXT: ldr w10, [sp, #192] +; CHECK-SD-NEXT: ldr w11, [sp, #224] +; CHECK-SD-NEXT: mov v2.h[2], w6 +; CHECK-SD-NEXT: mov v3.h[2], w2 +; CHECK-SD-NEXT: mov v0.h[2], w18 +; CHECK-SD-NEXT: mov v1.h[2], w16 +; CHECK-SD-NEXT: mov v5.h[2], w8 +; CHECK-SD-NEXT: mov v4.h[2], w11 +; CHECK-SD-NEXT: mov v6.h[2], w10 +; CHECK-SD-NEXT: mov v7.h[2], w9 +; CHECK-SD-NEXT: ldr w12, [sp, #72] +; CHECK-SD-NEXT: ldr w13, [sp, #104] +; CHECK-SD-NEXT: ldr w8, [sp, #136] +; CHECK-SD-NEXT: ldr w9, [sp, #168] +; CHECK-SD-NEXT: ldr w10, [sp, #200] +; CHECK-SD-NEXT: ldr w11, [sp, #232] +; CHECK-SD-NEXT: mov v0.h[3], w13 +; CHECK-SD-NEXT: mov v1.h[3], w12 +; CHECK-SD-NEXT: mov v2.h[3], w7 +; CHECK-SD-NEXT: mov v3.h[3], w3 +; CHECK-SD-NEXT: mov v5.h[3], w8 +; CHECK-SD-NEXT: mov v4.h[3], w11 +; CHECK-SD-NEXT: mov v6.h[3], w10 +; CHECK-SD-NEXT: mov v7.h[3], w9 +; CHECK-SD-NEXT: movi v16.4s, #15, msl #8 +; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-SD-NEXT: ushll v2.4s, v2.4h, #0 +; CHECK-SD-NEXT: ushll v3.4s, v3.4h, #0 +; CHECK-SD-NEXT: ushll v5.4s, v5.4h, #0 +; CHECK-SD-NEXT: ushll v4.4s, v4.4h, #0 +; CHECK-SD-NEXT: ushll v6.4s, v6.4h, #0 +; CHECK-SD-NEXT: ushll v7.4s, v7.4h, #0 +; CHECK-SD-NEXT: and v17.16b, v0.16b, v16.16b +; CHECK-SD-NEXT: and v18.16b, v1.16b, v16.16b +; CHECK-SD-NEXT: and v1.16b, v2.16b, v16.16b +; CHECK-SD-NEXT: and v0.16b, v3.16b, v16.16b +; CHECK-SD-NEXT: and v2.16b, v5.16b, v16.16b +; CHECK-SD-NEXT: and v3.16b, v4.16b, v16.16b +; CHECK-SD-NEXT: and v4.16b, v6.16b, v16.16b +; CHECK-SD-NEXT: and v5.16b, v7.16b, v16.16b +; CHECK-SD-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-SD-NEXT: add v3.4s, v17.4s, v3.4s +; CHECK-SD-NEXT: add v1.4s, v1.4s, v5.4s +; CHECK-SD-NEXT: add v2.4s, v18.4s, v4.4s +; CHECK-SD-NEXT: ldr x23, [sp], #48 // 8-byte Folded Reload +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: i12: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fmov s1, w0 +; CHECK-GI-NEXT: fmov s4, w4 +; CHECK-GI-NEXT: ldr s0, [sp] +; CHECK-GI-NEXT: ldr s20, [sp, #8] +; CHECK-GI-NEXT: ldr s2, [sp, #32] +; CHECK-GI-NEXT: ldr s21, [sp, #40] +; CHECK-GI-NEXT: ldr s16, [sp, #64] +; CHECK-GI-NEXT: ldr s22, [sp, #72] +; CHECK-GI-NEXT: ldr s17, [sp, #96] +; CHECK-GI-NEXT: ldr s23, [sp, #104] +; CHECK-GI-NEXT: mov v1.s[1], w1 +; CHECK-GI-NEXT: mov v4.s[1], w5 +; CHECK-GI-NEXT: ldr s18, [sp, #128] +; CHECK-GI-NEXT: ldr s24, [sp, #136] +; CHECK-GI-NEXT: mov v0.s[1], v20.s[0] +; CHECK-GI-NEXT: ldr s19, [sp, #160] +; CHECK-GI-NEXT: ldr s25, [sp, #168] +; CHECK-GI-NEXT: mov v2.s[1], v21.s[0] +; CHECK-GI-NEXT: mov v16.s[1], v22.s[0] +; CHECK-GI-NEXT: mov v17.s[1], v23.s[0] +; CHECK-GI-NEXT: mov v18.s[1], v24.s[0] +; CHECK-GI-NEXT: mov v19.s[1], v25.s[0] +; CHECK-GI-NEXT: ldr s6, [sp, #16] +; CHECK-GI-NEXT: ldr s7, [sp, #48] +; CHECK-GI-NEXT: ldr s20, [sp, #80] +; CHECK-GI-NEXT: ldr s21, [sp, #112] +; CHECK-GI-NEXT: ldr s22, [sp, #144] +; CHECK-GI-NEXT: ldr s23, [sp, #176] +; CHECK-GI-NEXT: mov v1.s[2], w2 +; CHECK-GI-NEXT: mov v4.s[2], w6 +; CHECK-GI-NEXT: mov v0.s[2], v6.s[0] +; CHECK-GI-NEXT: mov v2.s[2], v7.s[0] +; CHECK-GI-NEXT: mov v16.s[2], v20.s[0] +; CHECK-GI-NEXT: mov v17.s[2], v21.s[0] +; CHECK-GI-NEXT: mov v18.s[2], v22.s[0] +; CHECK-GI-NEXT: mov v19.s[2], v23.s[0] +; CHECK-GI-NEXT: ldr s3, [sp, #24] +; CHECK-GI-NEXT: ldr s5, [sp, #56] +; CHECK-GI-NEXT: ldr s6, [sp, #88] +; CHECK-GI-NEXT: ldr s7, [sp, #120] +; CHECK-GI-NEXT: ldr s20, [sp, #152] +; CHECK-GI-NEXT: ldr s21, [sp, #184] +; CHECK-GI-NEXT: mov v1.s[3], w3 +; CHECK-GI-NEXT: mov v4.s[3], w7 +; CHECK-GI-NEXT: movi v22.4s, #15, msl #8 +; CHECK-GI-NEXT: mov v0.s[3], v3.s[0] +; CHECK-GI-NEXT: mov v2.s[3], v5.s[0] +; CHECK-GI-NEXT: mov v16.s[3], v6.s[0] +; CHECK-GI-NEXT: mov v17.s[3], v7.s[0] +; CHECK-GI-NEXT: mov v18.s[3], v20.s[0] +; CHECK-GI-NEXT: mov v19.s[3], v21.s[0] +; CHECK-GI-NEXT: and v1.16b, v1.16b, v22.16b +; CHECK-GI-NEXT: and v3.16b, v4.16b, v22.16b +; CHECK-GI-NEXT: and v4.16b, v0.16b, v22.16b +; CHECK-GI-NEXT: and v5.16b, v2.16b, v22.16b +; CHECK-GI-NEXT: and v0.16b, v16.16b, v22.16b +; CHECK-GI-NEXT: and v2.16b, v17.16b, v22.16b +; CHECK-GI-NEXT: and v6.16b, v18.16b, v22.16b +; CHECK-GI-NEXT: and v7.16b, v19.16b, v22.16b +; CHECK-GI-NEXT: add v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: add v1.4s, v3.4s, v2.4s +; CHECK-GI-NEXT: add v2.4s, v4.4s, v6.4s +; CHECK-GI-NEXT: add v3.4s, v5.4s, v7.4s +; CHECK-GI-NEXT: ret entry: %s0s = zext <16 x i12> %s0 to <16 x i32> %s1s = zext <16 x i12> %s1 to <16 x i32> @@ -634,15 +1531,25 @@ entry: } define <16 x i32> @sub_zz(<16 x i8> %s0, <16 x i8> %s1) { -; CHECK-LABEL: sub_zz: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: usubl v2.8h, v0.8b, v1.8b -; CHECK-NEXT: usubl2 v4.8h, v0.16b, v1.16b -; CHECK-NEXT: sshll v0.4s, v2.4h, #0 -; CHECK-NEXT: sshll2 v3.4s, v4.8h, #0 -; CHECK-NEXT: sshll2 v1.4s, v2.8h, #0 -; CHECK-NEXT: sshll v2.4s, v4.4h, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sub_zz: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: usubl v2.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: usubl2 v4.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: sshll v0.4s, v2.4h, #0 +; CHECK-SD-NEXT: sshll2 v3.4s, v4.8h, #0 +; CHECK-SD-NEXT: sshll2 v1.4s, v2.8h, #0 +; CHECK-SD-NEXT: sshll v2.4s, v4.4h, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sub_zz: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: usubl v2.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: usubl2 v3.8h, v0.16b, v1.16b +; CHECK-GI-NEXT: sshll v0.4s, v2.4h, #0 +; CHECK-GI-NEXT: sshll2 v1.4s, v2.8h, #0 +; CHECK-GI-NEXT: sshll v2.4s, v3.4h, #0 +; CHECK-GI-NEXT: sshll2 v3.4s, v3.8h, #0 +; CHECK-GI-NEXT: ret entry: %s0s = zext <16 x i8> %s0 to <16 x i32> %s1s = zext <16 x i8> %s1 to <16 x i32> @@ -651,15 +1558,25 @@ entry: } define <16 x i32> @sub_ss(<16 x i8> %s0, <16 x i8> %s1) { -; CHECK-LABEL: sub_ss: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ssubl v2.8h, v0.8b, v1.8b -; CHECK-NEXT: ssubl2 v4.8h, v0.16b, v1.16b -; CHECK-NEXT: sshll v0.4s, v2.4h, #0 -; CHECK-NEXT: sshll2 v3.4s, v4.8h, #0 -; CHECK-NEXT: sshll2 v1.4s, v2.8h, #0 -; CHECK-NEXT: sshll v2.4s, v4.4h, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sub_ss: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ssubl v2.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: ssubl2 v4.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: sshll v0.4s, v2.4h, #0 +; CHECK-SD-NEXT: sshll2 v3.4s, v4.8h, #0 +; CHECK-SD-NEXT: sshll2 v1.4s, v2.8h, #0 +; CHECK-SD-NEXT: sshll v2.4s, v4.4h, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sub_ss: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ssubl v2.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: ssubl2 v3.8h, v0.16b, v1.16b +; CHECK-GI-NEXT: sshll v0.4s, v2.4h, #0 +; CHECK-GI-NEXT: sshll2 v1.4s, v2.8h, #0 +; CHECK-GI-NEXT: sshll v2.4s, v3.4h, #0 +; CHECK-GI-NEXT: sshll2 v3.4s, v3.8h, #0 +; CHECK-GI-NEXT: ret entry: %s0s = sext <16 x i8> %s0 to <16 x i32> %s1s = sext <16 x i8> %s1 to <16 x i32> @@ -668,17 +1585,33 @@ entry: } define <16 x i32> @sub_zs(<16 x i8> %s0, <16 x i8> %s1) { -; CHECK-LABEL: sub_zs: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v2.8h, v0.8b, #0 -; CHECK-NEXT: ushll2 v0.8h, v0.16b, #0 -; CHECK-NEXT: ssubw v2.8h, v2.8h, v1.8b -; CHECK-NEXT: ssubw2 v4.8h, v0.8h, v1.16b -; CHECK-NEXT: sshll v0.4s, v2.4h, #0 -; CHECK-NEXT: sshll2 v3.4s, v4.8h, #0 -; CHECK-NEXT: sshll2 v1.4s, v2.8h, #0 -; CHECK-NEXT: sshll v2.4s, v4.4h, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sub_zs: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ushll v2.8h, v0.8b, #0 +; CHECK-SD-NEXT: ushll2 v0.8h, v0.16b, #0 +; CHECK-SD-NEXT: ssubw v2.8h, v2.8h, v1.8b +; CHECK-SD-NEXT: ssubw2 v4.8h, v0.8h, v1.16b +; CHECK-SD-NEXT: sshll v0.4s, v2.4h, #0 +; CHECK-SD-NEXT: sshll2 v3.4s, v4.8h, #0 +; CHECK-SD-NEXT: sshll2 v1.4s, v2.8h, #0 +; CHECK-SD-NEXT: sshll v2.4s, v4.4h, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sub_zs: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ushll v2.8h, v0.8b, #0 +; CHECK-GI-NEXT: ushll2 v0.8h, v0.16b, #0 +; CHECK-GI-NEXT: sshll v3.8h, v1.8b, #0 +; CHECK-GI-NEXT: sshll2 v4.8h, v1.16b, #0 +; CHECK-GI-NEXT: ushll v1.4s, v2.4h, #0 +; CHECK-GI-NEXT: ushll2 v2.4s, v2.8h, #0 +; CHECK-GI-NEXT: ushll v5.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushll2 v6.4s, v0.8h, #0 +; CHECK-GI-NEXT: ssubw v0.4s, v1.4s, v3.4h +; CHECK-GI-NEXT: ssubw2 v1.4s, v2.4s, v3.8h +; CHECK-GI-NEXT: ssubw v2.4s, v5.4s, v4.4h +; CHECK-GI-NEXT: ssubw2 v3.4s, v6.4s, v4.8h +; CHECK-GI-NEXT: ret entry: %s0s = zext <16 x i8> %s0 to <16 x i32> %s1s = sext <16 x i8> %s1 to <16 x i32> diff --git a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll index 866b27b81d885..c91de8f3a0a47 100644 --- a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll +++ b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll @@ -8,36 +8,39 @@ define dso_local void @run_test() local_unnamed_addr uwtable { ; CHECK-LABEL: run_test: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #192 -; CHECK-NEXT: .cfi_def_cfa_offset 192 +; CHECK-NEXT: sub sp, sp, #208 +; CHECK-NEXT: .cfi_def_cfa_offset 208 ; CHECK-NEXT: stp d15, d14, [sp, #96] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #112] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #128] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #144] // 16-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #160] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #176] // 16-byte Folded Spill +; CHECK-NEXT: str x23, [sp, #160] // 8-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #176] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #192] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 ; CHECK-NEXT: .cfi_offset w22, -32 -; CHECK-NEXT: .cfi_offset b8, -40 -; CHECK-NEXT: .cfi_offset b9, -48 -; CHECK-NEXT: .cfi_offset b10, -56 -; CHECK-NEXT: .cfi_offset b11, -64 -; CHECK-NEXT: .cfi_offset b12, -72 -; CHECK-NEXT: .cfi_offset b13, -80 -; CHECK-NEXT: .cfi_offset b14, -88 -; CHECK-NEXT: .cfi_offset b15, -96 +; CHECK-NEXT: .cfi_offset w23, -48 +; CHECK-NEXT: .cfi_offset b8, -56 +; CHECK-NEXT: .cfi_offset b9, -64 +; CHECK-NEXT: .cfi_offset b10, -72 +; CHECK-NEXT: .cfi_offset b11, -80 +; CHECK-NEXT: .cfi_offset b12, -88 +; CHECK-NEXT: .cfi_offset b13, -96 +; CHECK-NEXT: .cfi_offset b14, -104 +; CHECK-NEXT: .cfi_offset b15, -112 ; CHECK-NEXT: movi v2.2d, #0000000000000000 ; CHECK-NEXT: // implicit-def: $q1 ; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: mov x9, xzr -; CHECK-NEXT: adrp x10, B+48 -; CHECK-NEXT: add x10, x10, :lo12:B+48 -; CHECK-NEXT: adrp x11, A -; CHECK-NEXT: add x11, x11, :lo12:A +; CHECK-NEXT: adrp x9, B+48 +; CHECK-NEXT: add x9, x9, :lo12:B+48 +; CHECK-NEXT: adrp x10, A +; CHECK-NEXT: add x10, x10, :lo12:A +; CHECK-NEXT: mov x11, xzr ; CHECK-NEXT: // kill: killed $q1 ; CHECK-NEXT: // implicit-def: $q1 +; CHECK-NEXT: mov x12, xzr ; CHECK-NEXT: // implicit-def: $q0 ; CHECK-NEXT: // implicit-def: $q3 ; CHECK-NEXT: // implicit-def: $q4 @@ -69,103 +72,102 @@ define dso_local void @run_test() local_unnamed_addr uwtable { ; CHECK-NEXT: // kill: killed $q1 ; CHECK-NEXT: .LBB0_1: // %for.cond1.preheader ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: str q14, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: ldr q14, [x8] -; CHECK-NEXT: mov x12, xzr -; CHECK-NEXT: ldr x14, [x12] ; CHECK-NEXT: stp q29, q15, [sp] // 32-byte Folded Spill -; CHECK-NEXT: add x19, x11, x8 -; CHECK-NEXT: fmov x15, d14 -; CHECK-NEXT: mov x16, v14.d[1] -; CHECK-NEXT: ldr q15, [x12] -; CHECK-NEXT: ldr q14, [x10], #64 +; CHECK-NEXT: ldr q15, [x8] +; CHECK-NEXT: ldr x15, [x8] +; CHECK-NEXT: str q14, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: add x20, x10, x11 ; CHECK-NEXT: mov v8.16b, v28.16b -; CHECK-NEXT: fmov x13, d15 -; CHECK-NEXT: mov x18, v15.d[1] +; CHECK-NEXT: fmov x2, d15 +; CHECK-NEXT: mov x17, v15.d[1] +; CHECK-NEXT: ldr q14, [x8] ; CHECK-NEXT: mov v28.16b, v24.16b -; CHECK-NEXT: mul x17, x15, x14 -; CHECK-NEXT: mov x12, v14.d[1] -; CHECK-NEXT: fmov x4, d14 ; CHECK-NEXT: mov v24.16b, v20.16b ; CHECK-NEXT: mov v20.16b, v17.16b +; CHECK-NEXT: fmov x13, d14 +; CHECK-NEXT: mov x16, v14.d[1] ; CHECK-NEXT: mov v17.16b, v5.16b -; CHECK-NEXT: mul x1, x16, x14 +; CHECK-NEXT: mul x3, x2, x15 +; CHECK-NEXT: ldr q14, [x9], #64 ; CHECK-NEXT: ldr q5, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x5, [x8] -; CHECK-NEXT: ldr x19, [x19, #128] +; CHECK-NEXT: ldr x6, [x8] +; CHECK-NEXT: ldr x20, [x20, #128] +; CHECK-NEXT: mul x1, x17, x15 +; CHECK-NEXT: mov x14, v14.d[1] +; CHECK-NEXT: fmov x5, d14 ; CHECK-NEXT: mov v29.16b, v21.16b ; CHECK-NEXT: mov v21.16b, v0.16b -; CHECK-NEXT: mul x0, x13, x14 ; CHECK-NEXT: mov v25.16b, v6.16b +; CHECK-NEXT: mul x18, x13, x15 ; CHECK-NEXT: mov v6.16b, v2.16b -; CHECK-NEXT: fmov d15, x17 ; CHECK-NEXT: mov v26.16b, v22.16b +; CHECK-NEXT: fmov d15, x3 ; CHECK-NEXT: mov v22.16b, v18.16b -; CHECK-NEXT: mul x2, x18, x14 ; CHECK-NEXT: mov v18.16b, v7.16b +; CHECK-NEXT: mul x0, x16, x15 ; CHECK-NEXT: mov v7.16b, v3.16b ; CHECK-NEXT: mov v16.16b, v4.16b -; CHECK-NEXT: add x8, x8, #8 -; CHECK-NEXT: add x9, x9, #1 +; CHECK-NEXT: add x11, x11, #8 +; CHECK-NEXT: add x12, x12, #1 ; CHECK-NEXT: mov v15.d[1], x1 -; CHECK-NEXT: mul x3, x12, x14 -; CHECK-NEXT: cmp x8, #64 -; CHECK-NEXT: fmov d14, x0 -; CHECK-NEXT: mul x14, x4, x14 +; CHECK-NEXT: mul x4, x14, x15 +; CHECK-NEXT: cmp x11, #64 +; CHECK-NEXT: fmov d14, x18 +; CHECK-NEXT: mul x15, x5, x15 ; CHECK-NEXT: add v5.2d, v5.2d, v15.2d -; CHECK-NEXT: mul x20, x15, x5 -; CHECK-NEXT: mov v14.d[1], x2 -; CHECK-NEXT: mul x15, x15, x19 -; CHECK-NEXT: fmov d0, x14 +; CHECK-NEXT: mul x21, x2, x6 +; CHECK-NEXT: mov v14.d[1], x0 +; CHECK-NEXT: mul x2, x2, x20 +; CHECK-NEXT: fmov d0, x15 ; CHECK-NEXT: str q5, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: ldr q5, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mul x21, x13, x19 +; CHECK-NEXT: mul x22, x13, x20 ; CHECK-NEXT: add v5.2d, v5.2d, v14.2d -; CHECK-NEXT: fmov d3, x20 -; CHECK-NEXT: mul x7, x16, x5 -; CHECK-NEXT: mov v0.d[1], x3 -; CHECK-NEXT: fmov d1, x15 -; CHECK-NEXT: mul x16, x16, x19 +; CHECK-NEXT: fmov d3, x21 +; CHECK-NEXT: mul x19, x17, x6 +; CHECK-NEXT: mov v0.d[1], x4 +; CHECK-NEXT: fmov d1, x2 +; CHECK-NEXT: mul x17, x17, x20 ; CHECK-NEXT: str q5, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: add v5.2d, v13.2d, v14.2d -; CHECK-NEXT: fmov d2, x21 +; CHECK-NEXT: fmov d2, x22 ; CHECK-NEXT: ldr q13, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: mul x6, x18, x5 +; CHECK-NEXT: mul x7, x16, x6 ; CHECK-NEXT: ldp q15, q14, [sp, #16] // 32-byte Folded Reload -; CHECK-NEXT: mov v3.d[1], x7 +; CHECK-NEXT: mov v3.d[1], x19 ; CHECK-NEXT: add v13.2d, v13.2d, v0.2d -; CHECK-NEXT: mul x18, x18, x19 -; CHECK-NEXT: mov v1.d[1], x16 -; CHECK-NEXT: mul x22, x4, x19 +; CHECK-NEXT: mul x16, x16, x20 +; CHECK-NEXT: mov v1.d[1], x17 +; CHECK-NEXT: mul x23, x5, x20 ; CHECK-NEXT: str q13, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov v13.16b, v5.16b ; CHECK-NEXT: mov v5.16b, v17.16b ; CHECK-NEXT: mov v17.16b, v20.16b ; CHECK-NEXT: mov v20.16b, v24.16b -; CHECK-NEXT: mul x13, x13, x5 +; CHECK-NEXT: mul x13, x13, x6 ; CHECK-NEXT: mov v24.16b, v28.16b ; CHECK-NEXT: add v11.2d, v11.2d, v3.2d -; CHECK-NEXT: mov v2.d[1], x18 +; CHECK-NEXT: mov v2.d[1], x16 ; CHECK-NEXT: add v15.2d, v15.2d, v1.2d ; CHECK-NEXT: add v27.2d, v27.2d, v3.2d -; CHECK-NEXT: mul x17, x12, x19 +; CHECK-NEXT: mul x18, x14, x20 ; CHECK-NEXT: add v23.2d, v23.2d, v3.2d ; CHECK-NEXT: add v19.2d, v19.2d, v3.2d -; CHECK-NEXT: fmov d4, x22 +; CHECK-NEXT: fmov d4, x23 ; CHECK-NEXT: add v10.2d, v10.2d, v3.2d -; CHECK-NEXT: mul x14, x4, x5 +; CHECK-NEXT: mul x15, x5, x6 ; CHECK-NEXT: fmov d0, x13 ; CHECK-NEXT: add v14.2d, v14.2d, v2.2d ; CHECK-NEXT: add v2.2d, v6.2d, v3.2d -; CHECK-NEXT: mul x12, x12, x5 +; CHECK-NEXT: mul x14, x14, x6 ; CHECK-NEXT: mov v3.16b, v7.16b ; CHECK-NEXT: mov v7.16b, v18.16b -; CHECK-NEXT: mov v4.d[1], x17 +; CHECK-NEXT: mov v4.d[1], x18 ; CHECK-NEXT: mov v18.16b, v22.16b -; CHECK-NEXT: mov v0.d[1], x6 -; CHECK-NEXT: fmov d1, x14 +; CHECK-NEXT: mov v0.d[1], x7 +; CHECK-NEXT: fmov d1, x15 ; CHECK-NEXT: add v28.2d, v8.2d, v4.2d -; CHECK-NEXT: mov v1.d[1], x12 +; CHECK-NEXT: mov v1.d[1], x14 ; CHECK-NEXT: add v31.2d, v31.2d, v0.2d ; CHECK-NEXT: add v30.2d, v30.2d, v0.2d ; CHECK-NEXT: add v12.2d, v12.2d, v0.2d @@ -192,11 +194,12 @@ define dso_local void @run_test() local_unnamed_addr uwtable { ; CHECK-NEXT: adrp x8, C ; CHECK-NEXT: add x8, x8, :lo12:C ; CHECK-NEXT: stp q11, q30, [x8, #80] -; CHECK-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #192] // 16-byte Folded Reload ; CHECK-NEXT: str q1, [x8] ; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr x23, [sp, #160] // 8-byte Folded Reload ; CHECK-NEXT: stp q15, q14, [x8, #144] -; CHECK-NEXT: ldp x22, x21, [sp, #160] // 16-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #176] // 16-byte Folded Reload ; CHECK-NEXT: stp q1, q13, [x8, #16] ; CHECK-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: stp q28, q12, [x8, #176] @@ -216,12 +219,13 @@ define dso_local void @run_test() local_unnamed_addr uwtable { ; CHECK-NEXT: stp q5, q4, [x8, #432] ; CHECK-NEXT: stp q2, q3, [x8, #464] ; CHECK-NEXT: str q0, [x8, #496] -; CHECK-NEXT: add sp, sp, #192 +; CHECK-NEXT: add sp, sp, #208 ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w19 ; CHECK-NEXT: .cfi_restore w20 ; CHECK-NEXT: .cfi_restore w21 ; CHECK-NEXT: .cfi_restore w22 +; CHECK-NEXT: .cfi_restore w23 ; CHECK-NEXT: .cfi_restore b8 ; CHECK-NEXT: .cfi_restore b9 ; CHECK-NEXT: .cfi_restore b10 diff --git a/llvm/test/CodeGen/AArch64/relaxed-fp-atomics.ll b/llvm/test/CodeGen/AArch64/relaxed-fp-atomics.ll index 95abbb6979be8..af664549a472a 100644 --- a/llvm/test/CodeGen/AArch64/relaxed-fp-atomics.ll +++ b/llvm/test/CodeGen/AArch64/relaxed-fp-atomics.ll @@ -91,4 +91,94 @@ define void @atomic_store_relaxed_f64(ptr %p, i32 %off32, i64 %off64, double %va ret void } +define half @atomic_load_relaxed_f16(ptr %p, i32 %off32, i64 %off64) #0 { +; CHECK-LABEL: atomic_load_relaxed_f16: + %ptr_unsigned = getelementptr half, ptr %p, i32 4095 + %val_unsigned = load atomic half, ptr %ptr_unsigned monotonic, align 4 +; CHECK: ldrh {{w[0-9]+}}, [x0, #8190] + + %ptr_regoff = getelementptr half, ptr %p, i32 %off32 + %val_regoff = load atomic half, ptr %ptr_regoff unordered, align 4 + %tot1 = fadd half %val_unsigned, %val_regoff +; CHECK: ldrh {{w[0-9]+}}, [x0, w1, sxtw #1] + + %ptr_regoff64 = getelementptr half, ptr %p, i64 %off64 + %val_regoff64 = load atomic half, ptr %ptr_regoff64 monotonic, align 4 + %tot2 = fadd half %tot1, %val_regoff64 +; CHECK: ldrh {{w[0-9]+}}, [x0, x2, lsl #1] + + %ptr_unscaled = getelementptr half, ptr %p, i32 -64 + %val_unscaled = load atomic half, ptr %ptr_unscaled unordered, align 4 + %tot3 = fadd half %tot2, %val_unscaled +; CHECK: ldurh {{w[0-9]+}}, [x0, #-128] + + ret half %tot3 +} + +define bfloat @atomic_load_relaxed_bf16(ptr %p, i32 %off32, i64 %off64) #0 { +; CHECK-LABEL: atomic_load_relaxed_bf16: + %ptr_unsigned = getelementptr bfloat, ptr %p, i32 4095 + %val_unsigned = load atomic bfloat, ptr %ptr_unsigned monotonic, align 4 +; CHECK: ldrh {{w[0-9]+}}, [x0, #8190] + + %ptr_regoff = getelementptr bfloat, ptr %p, i32 %off32 + %val_regoff = load atomic bfloat, ptr %ptr_regoff unordered, align 4 + %tot1 = fadd bfloat %val_unsigned, %val_regoff +; CHECK: ldrh {{w[0-9]+}}, [x0, w1, sxtw #1] + + %ptr_regoff64 = getelementptr bfloat, ptr %p, i64 %off64 + %val_regoff64 = load atomic bfloat, ptr %ptr_regoff64 monotonic, align 4 + %tot2 = fadd bfloat %tot1, %val_regoff64 +; CHECK: ldrh {{w[0-9]+}}, [x0, x2, lsl #1] + + %ptr_unscaled = getelementptr bfloat, ptr %p, i32 -64 + %val_unscaled = load atomic bfloat, ptr %ptr_unscaled unordered, align 4 + %tot3 = fadd bfloat %tot2, %val_unscaled +; CHECK: ldurh {{w[0-9]+}}, [x0, #-128] + + ret bfloat %tot3 +} + +define void @atomic_store_relaxed_f16(ptr %p, i32 %off32, i64 %off64, half %val) #0 { +; CHECK-LABEL: atomic_store_relaxed_f16: + %ptr_unsigned = getelementptr half, ptr %p, i32 4095 + store atomic half %val, ptr %ptr_unsigned monotonic, align 4 +; CHECK: strh {{w[0-9]+}}, [x0, #8190] + + %ptr_regoff = getelementptr half, ptr %p, i32 %off32 + store atomic half %val, ptr %ptr_regoff unordered, align 4 +; CHECK: strh {{w[0-9]+}}, [x0, w1, sxtw #1] + + %ptr_regoff64 = getelementptr half, ptr %p, i64 %off64 + store atomic half %val, ptr %ptr_regoff64 monotonic, align 4 +; CHECK: strh {{w[0-9]+}}, [x0, x2, lsl #1] + + %ptr_unscaled = getelementptr half, ptr %p, i32 -64 + store atomic half %val, ptr %ptr_unscaled unordered, align 4 +; CHECK: sturh {{w[0-9]+}}, [x0, #-128] + + ret void +} + +define void @atomic_store_relaxed_bf16(ptr %p, i32 %off32, i64 %off64, bfloat %val) #0 { +; CHECK-LABEL: atomic_store_relaxed_bf16: + %ptr_unsigned = getelementptr bfloat, ptr %p, i32 4095 + store atomic bfloat %val, ptr %ptr_unsigned monotonic, align 4 +; CHECK: strh {{w[0-9]+}}, [x0, #8190] + + %ptr_regoff = getelementptr bfloat, ptr %p, i32 %off32 + store atomic bfloat %val, ptr %ptr_regoff unordered, align 4 +; CHECK: strh {{w[0-9]+}}, [x0, w1, sxtw #1] + + %ptr_regoff64 = getelementptr bfloat, ptr %p, i64 %off64 + store atomic bfloat %val, ptr %ptr_regoff64 monotonic, align 4 +; CHECK: strh {{w[0-9]+}}, [x0, x2, lsl #1] + + %ptr_unscaled = getelementptr bfloat, ptr %p, i32 -64 + store atomic bfloat %val, ptr %ptr_unscaled unordered, align 4 +; CHECK: sturh {{w[0-9]+}}, [x0, #-128] + + ret void +} + attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll index bc1db878cbd31..611cdcda157e2 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme-f16f16 -verify-machineinstrs < %s | FileCheck %s ; ; FCVT @@ -139,6 +139,15 @@ define {, ,, , ,, } %res } +define {, } @multi_vector_cvt_widen_x2_f16( %zn0) { +; CHECK-LABEL: multi_vector_cvt_widen_x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvt { z0.s, z1.s }, z0.h +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fcvt.widen.x2.nxv4f32( %zn0) + ret {, } %res +} + declare @llvm.aarch64.sve.fcvt.x2.nxv4f32(, ) declare @llvm.aarch64.sve.bfcvt.x2(, ) declare {, } @llvm.aarch64.sve.fcvtzs.x2.nxv4i32.nxv4f32(,) diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtl.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtl.ll new file mode 100644 index 0000000000000..30dc7cbfaea6c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtl.ll @@ -0,0 +1,11 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme-f16f16 -verify-machineinstrs < %s | FileCheck %s + +define {, } @multi_vector_cvtl_widen_x2_f16( %zn0) { +; CHECK-LABEL: multi_vector_cvtl_widen_x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl { z0.s, z1.s }, z0.h +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fcvtl.widen.x2.nxv4f32( %zn0) + ret {, } %res +} diff --git a/llvm/test/CodeGen/AArch64/sme2p1-intrinsics-zero.ll b/llvm/test/CodeGen/AArch64/sme2p1-intrinsics-zero.ll new file mode 100644 index 0000000000000..ba77637580f4c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2p1-intrinsics-zero.ll @@ -0,0 +1,190 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -verify-machineinstrs < %s | FileCheck %s + +target triple = "aarch64-linux" + +define void @test_svzero_za64_vg1x2(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg1x2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 0, vgx2] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.zero.za64.vg1x2(i32 %slice) + ret void +} + +define void @test_svzero_za64_vg1x2_offset(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg1x2_offset: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 7, vgx2] +; CHECK-NEXT: ret +entry: + %slice.max = add i32 %slice, 7 + tail call void @llvm.aarch64.sme.zero.za64.vg1x2(i32 %slice.max) + ret void +} + +define void @test_svzero_za64_vg1x4(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg1x4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 0, vgx4] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.zero.za64.vg1x4(i32 %slice) + ret void +} + +define void @test_svzero_za64_vg1x4_offset(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg1x4_offset: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 1, vgx4] +; CHECK-NEXT: ret +entry: + %slice.min = add i32 %slice, 1 + tail call void @llvm.aarch64.sme.zero.za64.vg1x4(i32 %slice.min) + ret void +} + +define void @test_svzero_za64_vg2x1(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg2x1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 0:1] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.zero.za64.vg2x1(i32 %slice) + ret void +} + +define void @test_svzero_za64_vg2x1_offset(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg2x1_offset: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 6:7] +; CHECK-NEXT: ret +entry: + %slice.max = add i32 %slice, 6 + tail call void @llvm.aarch64.sme.zero.za64.vg2x1(i32 %slice.max) + ret void +} + +define void @test_svzero_za64_vg2x2(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg2x2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 0:1, vgx2] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.zero.za64.vg2x2(i32 %slice) + ret void +} + +define void @test_svzero_za64_vg2x2_offset(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg2x2_offset: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 2:3, vgx2] +; CHECK-NEXT: ret +entry: + %slice.max = add i32 %slice, 2 + tail call void @llvm.aarch64.sme.zero.za64.vg2x2(i32 %slice.max) + ret void +} + +define void @test_svzero_za64_vg2x4(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg2x4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 0:1, vgx4] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.zero.za64.vg2x4(i32 %slice) + ret void +} + +define void @test_svzero_za64_vg2x4_offset(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg2x4_offset: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: add w8, w0, #1 +; CHECK-NEXT: zero za.d[w8, 0:1, vgx4] +; CHECK-NEXT: ret +entry: + %slice.min = add i32 %slice, 1 + tail call void @llvm.aarch64.sme.zero.za64.vg2x4(i32 %slice.min) + ret void +} + +define void @test_svzero_za64_vg4x1(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg4x1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 0:3] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.zero.za64.vg4x1(i32 %slice) + ret void +} + +define void @test_svzero_za64_vg4x1_offset(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg4x1_offset: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 4:7] +; CHECK-NEXT: ret +entry: + %slice.max = add i32 %slice, 4 + tail call void @llvm.aarch64.sme.zero.za64.vg4x1(i32 %slice.max) + ret void +} + +define void @test_svzero_za64_vg4x2(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg4x2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 0:3, vgx2] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.zero.za64.vg4x2(i32 %slice) + ret void +} + +define void @test_svzero_za64_vg4x2_offset(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg4x2_offset: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 0:3, vgx2] +; CHECK-NEXT: ret +entry: + %slice.max = add i32 %slice, 0 + tail call void @llvm.aarch64.sme.zero.za64.vg4x2(i32 %slice.max) + ret void +} + +define void @test_svzero_za64_vg4x4(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg4x4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: zero za.d[w8, 0:3, vgx4] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.zero.za64.vg4x4(i32 %slice) + ret void +} + +define void @test_svzero_za64_vg4x4_offset(i32 %slice) #0 { +; CHECK-LABEL: test_svzero_za64_vg4x4_offset: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: add w8, w0, #1 +; CHECK-NEXT: zero za.d[w8, 0:3, vgx4] +; CHECK-NEXT: ret +entry: + %slice.min = add i32 %slice, 1 + tail call void @llvm.aarch64.sme.zero.za64.vg4x4(i32 %slice.min) + ret void +} + +attributes #0 = { nounwind "target-features" = "+sme2p1"} diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll index e843537c10a33..4cdb175f55c9c 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -14,6 +15,19 @@ define <4 x i8> @vls_sve_and_4xi8(<4 x i8> %b) nounwind { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_4xi8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: strh wzr, [sp, #12] +; NONEON-NOSVE-NEXT: strh wzr, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %c = and <4 x i8> %b, ret <4 x i8> %c } @@ -27,6 +41,25 @@ define <8 x i8> @vls_sve_and_8xi8(<8 x i8> %b) nounwind { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_8xi8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #14] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #15] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #13] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #11] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #9] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %c = and <8 x i8> %b, ret <8 x i8> %c } @@ -40,6 +73,37 @@ define <16 x i8> @vls_sve_and_16xi8(<16 x i8> %b) nounwind { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_16xi8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #30] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #28] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #26] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #24] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #22] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #20] +; NONEON-NOSVE-NEXT: strb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #18] +; NONEON-NOSVE-NEXT: strb w8, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #16] +; NONEON-NOSVE-NEXT: strb w8, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %c = and <16 x i8> %b, ret <16 x i8> %c } @@ -56,6 +120,61 @@ define <32 x i8> @vls_sve_and_32xi8(<32 x i8> %ap) nounwind { ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_32xi8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-64]! +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #46] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #62] +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #60] +; NONEON-NOSVE-NEXT: strb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #58] +; NONEON-NOSVE-NEXT: strb w8, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #56] +; NONEON-NOSVE-NEXT: strb w8, [sp, #59] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #54] +; NONEON-NOSVE-NEXT: strb w8, [sp, #57] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #52] +; NONEON-NOSVE-NEXT: strb w8, [sp, #55] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #50] +; NONEON-NOSVE-NEXT: strb w8, [sp, #53] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #48] +; NONEON-NOSVE-NEXT: strb w8, [sp, #51] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: strb w8, [sp, #49] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %b = and <32 x i8> %ap, ret <32 x i8> %b @@ -73,6 +192,15 @@ define <2 x i16> @vls_sve_and_2xi16(<2 x i16> %b) nounwind { ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_2xi16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %c = and <2 x i16> %b, ret <2 x i16> %c } @@ -86,6 +214,19 @@ define <4 x i16> @vls_sve_and_4xi16(<4 x i16> %b) nounwind { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_4xi16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: strh wzr, [sp, #12] +; NONEON-NOSVE-NEXT: strh wzr, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %c = and <4 x i16> %b, ret <4 x i16> %c } @@ -99,6 +240,25 @@ define <8 x i16> @vls_sve_and_8xi16(<8 x i16> %b) nounwind { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_8xi16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: strh wzr, [sp, #28] +; NONEON-NOSVE-NEXT: strh wzr, [sp, #24] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: strh wzr, [sp, #20] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: strh wzr, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %c = and <8 x i16> %b, ret <8 x i16> %c } @@ -115,6 +275,37 @@ define <16 x i16> @vls_sve_and_16xi16(<16 x i16> %b) nounwind { ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_16xi16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-64]! +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: strh wzr, [sp, #44] +; NONEON-NOSVE-NEXT: strh wzr, [sp, #40] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: strh wzr, [sp, #36] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: strh wzr, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: strh wzr, [sp, #60] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: strh wzr, [sp, #56] +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: strh wzr, [sp, #52] +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: strh wzr, [sp, #48] +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %c = and <16 x i16> %b, ret <16 x i16> %c } @@ -128,6 +319,15 @@ define <2 x i32> @vls_sve_and_2xi32(<2 x i32> %b) nounwind { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_2xi32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %c = and <2 x i32> %b, ret <2 x i32> %c } @@ -141,6 +341,17 @@ define <4 x i32> @vls_sve_and_4xi32(<4 x i32> %b) nounwind { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_4xi32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %c = and <4 x i32> %b, ret <4 x i32> %c } @@ -157,6 +368,21 @@ define <8 x i32> @vls_sve_and_8xi32(<8 x i32> %b) nounwind { ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_8xi32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-64]! +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %c = and <8 x i32> %b, ret <8 x i32> %c } @@ -170,6 +396,15 @@ define <2 x i64> @vls_sve_and_2xi64(<2 x i64> %b) nounwind { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_2xi64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: stp xzr, x8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %c = and <2 x i64> %b, ret <2 x i64> %c } @@ -185,6 +420,20 @@ define <4 x i64> @vls_sve_and_4xi64(<4 x i64> %b) nounwind { ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_4xi64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: str q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #40] +; NONEON-NOSVE-NEXT: str q1, [sp] +; NONEON-NOSVE-NEXT: stp xzr, x8, [sp, #48] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: stp xzr, x8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %c = and <4 x i64> %b, ret <4 x i64> %c } diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll index aa42d5c2a8c13..f920efeb4892d 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -18,6 +19,30 @@ define <4 x i8> @ctlz_v4i8(<4 x i8> %op) { ; CHECK-NEXT: sub z0.h, z0.h, #8 // =0x8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #2] +; NONEON-NOSVE-NEXT: ldrb w11, [sp] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: clz w9, w9 +; NONEON-NOSVE-NEXT: clz w10, w10 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: sub w9, w9, #24 +; NONEON-NOSVE-NEXT: sub w10, w10, #24 +; NONEON-NOSVE-NEXT: strh w8, [sp, #14] +; NONEON-NOSVE-NEXT: clz w8, w11 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w10, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i8> @llvm.ctlz.v4i8(<4 x i8> %op) ret <4 x i8> %res } @@ -30,6 +55,46 @@ define <8 x i8> @ctlz_v8i8(<8 x i8> %op) { ; CHECK-NEXT: clz z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #15] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #13] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #11] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #9] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %op) ret <8 x i8> %res } @@ -42,6 +107,78 @@ define <16 x i8> @ctlz_v16i8(<16 x i8> %op) { ; CHECK-NEXT: clz z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %op) ret <16 x i8> %res } @@ -55,6 +192,144 @@ define void @ctlz_v32i8(ptr %a) { ; CHECK-NEXT: clz z1.b, p0/m, z1.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #59] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #57] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #55] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #53] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #51] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #49] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %res = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %op) store <32 x i8> %res, ptr %a @@ -71,6 +346,21 @@ define <2 x i16> @ctlz_v2i16(<2 x i16> %op) { ; CHECK-NEXT: sub z0.s, z0.s, #16 // =0x10 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: clz w9, w9 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: sub w9, w9, #16 +; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> %op) ret <2 x i16> %res } @@ -83,6 +373,30 @@ define <4 x i16> @ctlz_v4i16(<4 x i16> %op) { ; CHECK-NEXT: clz z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w8, [sp] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %op) ret <4 x i16> %res } @@ -95,6 +409,46 @@ define <8 x i16> @ctlz_v8i16(<8 x i16> %op) { ; CHECK-NEXT: clz z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrh w8, [sp] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %op) ret <8 x i16> %res } @@ -108,6 +462,80 @@ define void @ctlz_v16i16(ptr %a) { ; CHECK-NEXT: clz z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: sub w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %op) store <16 x i16> %res, ptr %a @@ -122,6 +550,19 @@ define <2 x i32> @ctlz_v2i32(<2 x i32> %op) { ; CHECK-NEXT: clz z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: clz w9, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %op) ret <2 x i32> %res } @@ -134,6 +575,24 @@ define <4 x i32> @ctlz_v4i32(<4 x i32> %op) { ; CHECK-NEXT: clz z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: clz w9, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: clz w9, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %op) ret <4 x i32> %res } @@ -147,6 +606,36 @@ define void @ctlz_v8i32(ptr %a) { ; CHECK-NEXT: clz z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: clz w9, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: clz w9, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: clz w9, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: clz w9, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %op) store <8 x i32> %res, ptr %a @@ -161,6 +650,17 @@ define <1 x i64> @ctlz_v1i64(<1 x i64> %op) { ; CHECK-NEXT: clz z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: clz x8, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %op) ret <1 x i64> %res } @@ -173,6 +673,19 @@ define <2 x i64> @ctlz_v2i64(<2 x i64> %op) { ; CHECK-NEXT: clz z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: clz x9, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: clz x8, x8 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %op) ret <2 x i64> %res } @@ -186,6 +699,26 @@ define void @ctlz_v4i64(ptr %a) { ; CHECK-NEXT: clz z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: clz x9, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: clz x8, x8 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: clz x9, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: clz x8, x8 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %op) store <4 x i64> %res, ptr %a @@ -205,6 +738,41 @@ define <4 x i8> @ctpop_v4i8(<4 x i8> %op) { ; CHECK-NEXT: cnt z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #80 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: str d0, [sp, #64] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #68] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #66] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: fmov d1, x9 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #64] +; NONEON-NOSVE-NEXT: fmov d2, x10 +; NONEON-NOSVE-NEXT: fmov d3, x8 +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: cnt v1.8b, v1.8b +; NONEON-NOSVE-NEXT: cnt v2.8b, v2.8b +; NONEON-NOSVE-NEXT: cnt v3.8b, v3.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h1, v1.8b +; NONEON-NOSVE-NEXT: uaddlv h2, v2.8b +; NONEON-NOSVE-NEXT: uaddlv h3, v3.8b +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: stp q3, q2, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #72] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> %op) ret <4 x i8> %res } @@ -217,6 +785,71 @@ define <8 x i8> @ctpop_v8i8(<8 x i8> %op) { ; CHECK-NEXT: cnt z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #144 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 144 +; NONEON-NOSVE-NEXT: str d0, [sp, #128] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #135] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #134] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #112] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #133] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #96] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #132] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #80] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #131] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #64] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #130] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #48] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #129] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #32] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #128] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] +; NONEON-NOSVE-NEXT: strb w8, [sp, #143] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: strb w8, [sp, #142] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] +; NONEON-NOSVE-NEXT: strb w8, [sp, #141] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: strb w8, [sp, #140] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: strb w8, [sp, #139] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #138] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: strb w8, [sp, #137] +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #136] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #136] +; NONEON-NOSVE-NEXT: add sp, sp, #144 +; NONEON-NOSVE-NEXT: ret %res = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %op) ret <8 x i8> %res } @@ -229,6 +862,130 @@ define <16 x i8> @ctpop_v16i8(<16 x i8> %op) { ; CHECK-NEXT: cnt z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #304 +; NONEON-NOSVE-NEXT: str x29, [sp, #288] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 304 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: str q0, [sp, #256] +; NONEON-NOSVE-NEXT: ldr x29, [sp, #288] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #271] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #270] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #240] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #269] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #224] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #268] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #208] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #267] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #192] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #266] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #176] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #265] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #160] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #264] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #144] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #263] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #128] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #262] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #112] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #261] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #96] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #260] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #80] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #259] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #64] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #258] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #48] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #257] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #32] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #256] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #240] +; NONEON-NOSVE-NEXT: strb w8, [sp, #287] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #224] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: strb w8, [sp, #286] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #208] +; NONEON-NOSVE-NEXT: strb w8, [sp, #285] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #192] +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: strb w8, [sp, #284] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #176] +; NONEON-NOSVE-NEXT: strb w8, [sp, #283] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #160] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #282] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #144] +; NONEON-NOSVE-NEXT: strb w8, [sp, #281] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] +; NONEON-NOSVE-NEXT: strb w8, [sp, #280] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] +; NONEON-NOSVE-NEXT: strb w8, [sp, #279] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] +; NONEON-NOSVE-NEXT: strb w8, [sp, #278] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] +; NONEON-NOSVE-NEXT: strb w8, [sp, #277] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] +; NONEON-NOSVE-NEXT: strb w8, [sp, #276] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: strb w8, [sp, #275] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #274] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: strb w8, [sp, #273] +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #272] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #272] +; NONEON-NOSVE-NEXT: add sp, sp, #304 +; NONEON-NOSVE-NEXT: ret %res = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %op) ret <16 x i8> %res } @@ -242,6 +999,244 @@ define void @ctpop_v32i8(ptr %a) { ; CHECK-NEXT: cnt z1.b, p0/m, z1.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: sub sp, sp, #576 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 592 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #512] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #543] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #542] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #240] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #541] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #224] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #540] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #208] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #539] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #192] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #538] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #176] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #537] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #160] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #536] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #144] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #535] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #128] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #534] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #112] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #533] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #96] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #532] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #80] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #531] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #64] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #530] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #48] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #529] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #32] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #528] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #527] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #526] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #496] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #525] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #480] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #524] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #464] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #523] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #448] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #522] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #432] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #521] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #416] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #520] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #400] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #519] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #384] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #518] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #368] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #517] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #352] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #516] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #336] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #515] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #320] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #514] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #304] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #513] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #288] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #512] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #272] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #240] +; NONEON-NOSVE-NEXT: strb w8, [sp, #575] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #224] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: strb w8, [sp, #574] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #208] +; NONEON-NOSVE-NEXT: strb w8, [sp, #573] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #192] +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: strb w8, [sp, #572] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #176] +; NONEON-NOSVE-NEXT: strb w8, [sp, #571] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #160] +; NONEON-NOSVE-NEXT: str q0, [sp, #256] +; NONEON-NOSVE-NEXT: strb w8, [sp, #570] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #144] +; NONEON-NOSVE-NEXT: strb w8, [sp, #569] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] +; NONEON-NOSVE-NEXT: strb w8, [sp, #568] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] +; NONEON-NOSVE-NEXT: strb w8, [sp, #567] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] +; NONEON-NOSVE-NEXT: strb w8, [sp, #566] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] +; NONEON-NOSVE-NEXT: strb w8, [sp, #565] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] +; NONEON-NOSVE-NEXT: strb w8, [sp, #564] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: strb w8, [sp, #563] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #562] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: strb w8, [sp, #561] +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #560] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #496] +; NONEON-NOSVE-NEXT: strb w8, [sp, #559] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #480] +; NONEON-NOSVE-NEXT: strb w8, [sp, #558] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #464] +; NONEON-NOSVE-NEXT: strb w8, [sp, #557] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #448] +; NONEON-NOSVE-NEXT: strb w8, [sp, #556] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #432] +; NONEON-NOSVE-NEXT: strb w8, [sp, #555] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #416] +; NONEON-NOSVE-NEXT: strb w8, [sp, #554] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #400] +; NONEON-NOSVE-NEXT: strb w8, [sp, #553] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #384] +; NONEON-NOSVE-NEXT: strb w8, [sp, #552] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #368] +; NONEON-NOSVE-NEXT: strb w8, [sp, #551] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #352] +; NONEON-NOSVE-NEXT: strb w8, [sp, #550] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #336] +; NONEON-NOSVE-NEXT: strb w8, [sp, #549] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #320] +; NONEON-NOSVE-NEXT: strb w8, [sp, #548] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #304] +; NONEON-NOSVE-NEXT: strb w8, [sp, #547] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #288] +; NONEON-NOSVE-NEXT: strb w8, [sp, #546] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #272] +; NONEON-NOSVE-NEXT: strb w8, [sp, #545] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #256] +; NONEON-NOSVE-NEXT: strb w8, [sp, #544] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #544] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #576 +; NONEON-NOSVE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %res = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %op) store <32 x i8> %res, ptr %a @@ -257,6 +1252,27 @@ define <2 x i16> @ctpop_v2i16(<2 x i16> %op) { ; CHECK-NEXT: cnt z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #48 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: str d0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #32] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: fmov d1, x9 +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: cnt v1.8b, v1.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h1, v1.8b +; NONEON-NOSVE-NEXT: stp q1, q0, [sp] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> %op) ret <2 x i16> %res } @@ -269,6 +1285,43 @@ define <4 x i16> @ctpop_v4i16(<4 x i16> %op) { ; CHECK-NEXT: cnt z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #80 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: str d0, [sp, #64] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #70] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #68] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #48] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #66] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #32] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #64] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #72] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %op) ret <4 x i16> %res } @@ -281,6 +1334,71 @@ define <8 x i16> @ctpop_v8i16(<8 x i16> %op) { ; CHECK-NEXT: cnt z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #160 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 +; NONEON-NOSVE-NEXT: str q0, [sp, #128] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #142] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #140] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #112] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #138] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #96] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #136] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #80] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #134] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #64] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #132] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #48] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #130] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #32] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #128] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] +; NONEON-NOSVE-NEXT: strh w8, [sp, #158] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: strh w8, [sp, #156] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] +; NONEON-NOSVE-NEXT: strh w8, [sp, #154] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: strh w8, [sp, #152] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: strh w8, [sp, #150] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #148] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #146] +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #144] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #144] +; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: ret %res = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %op) ret <8 x i16> %res } @@ -294,6 +1412,132 @@ define void @ctpop_v16i16(ptr %a) { ; CHECK-NEXT: cnt z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #336 +; NONEON-NOSVE-NEXT: str x29, [sp, #320] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 336 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ldr x29, [sp, #320] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #256] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #286] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #284] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #112] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #282] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #96] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #280] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #80] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #278] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #64] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #276] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #48] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #274] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #32] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #272] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #270] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #268] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #240] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #266] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #224] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #264] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #208] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #262] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #192] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #260] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #176] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #258] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #160] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #256] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #144] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] +; NONEON-NOSVE-NEXT: strh w8, [sp, #318] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: strh w8, [sp, #316] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] +; NONEON-NOSVE-NEXT: strh w8, [sp, #314] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: strh w8, [sp, #312] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: strh w8, [sp, #310] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [sp, #128] +; NONEON-NOSVE-NEXT: strh w8, [sp, #308] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #306] +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #304] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #240] +; NONEON-NOSVE-NEXT: strh w8, [sp, #302] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #224] +; NONEON-NOSVE-NEXT: strh w8, [sp, #300] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #208] +; NONEON-NOSVE-NEXT: strh w8, [sp, #298] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #192] +; NONEON-NOSVE-NEXT: strh w8, [sp, #296] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #176] +; NONEON-NOSVE-NEXT: strh w8, [sp, #294] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #160] +; NONEON-NOSVE-NEXT: strh w8, [sp, #292] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #144] +; NONEON-NOSVE-NEXT: strh w8, [sp, #290] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] +; NONEON-NOSVE-NEXT: strh w8, [sp, #288] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #288] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #336 +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %op) store <16 x i16> %res, ptr %a @@ -308,6 +1552,28 @@ define <2 x i32> @ctpop_v2i32(<2 x i32> %op) { ; CHECK-NEXT: cnt z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #48 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: str d0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #16] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %op) ret <2 x i32> %res } @@ -320,6 +1586,41 @@ define <4 x i32> @ctpop_v4i32(<4 x i32> %op) { ; CHECK-NEXT: cnt z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: str q0, [sp, #64] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #76] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #72] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #48] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #48] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #32] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #16] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #80] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %op) ret <4 x i32> %res } @@ -333,6 +1634,69 @@ define void @ctpop_v8i32(ptr %a) { ; CHECK-NEXT: cnt z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #192 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 192 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #128] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #156] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #152] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #48] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #148] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #48] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #32] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #144] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #140] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #136] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #112] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #132] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #96] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #80] +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #184] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #176] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #112] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #168] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #80] +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #64] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #160] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #160] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #192 +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %op) store <8 x i32> %res, ptr %a @@ -347,6 +1711,19 @@ define <1 x i64> @ctpop_v1i64(<1 x i64> %op) { ; CHECK-NEXT: cnt z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <1 x i64> @llvm.ctpop.v1i64(<1 x i64> %op) ret <1 x i64> %res } @@ -359,6 +1736,27 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %op) { ; CHECK-NEXT: cnt z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: str q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: str x8, [sp, #56] +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: str x8, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %op) ret <2 x i64> %res } @@ -372,6 +1770,41 @@ define void @ctpop_v4i64(ptr %a) { ; CHECK-NEXT: cnt z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #128 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #64] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #88] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: str x8, [sp, #120] +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #72] +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: str x8, [sp, #112] +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #64] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: str x8, [sp, #104] +; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b +; NONEON-NOSVE-NEXT: str q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] +; NONEON-NOSVE-NEXT: str x8, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %op) store <4 x i64> %res, ptr %a @@ -392,6 +1825,34 @@ define <4 x i8> @cttz_v4i8(<4 x i8> %op) { ; CHECK-NEXT: clz z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w8, [sp] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %op) ret <4 x i8> %res } @@ -405,6 +1866,54 @@ define <8 x i8> @cttz_v8i8(<8 x i8> %op) { ; CHECK-NEXT: clz z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #15] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #13] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #11] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #9] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %op) ret <8 x i8> %res } @@ -418,6 +1927,94 @@ define <16 x i8> @cttz_v16i8(<16 x i8> %op) { ; CHECK-NEXT: clz z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %op) ret <16 x i8> %res } @@ -433,6 +2030,176 @@ define void @cttz_v32i8(ptr %a) { ; CHECK-NEXT: clz z1.b, p0/m, z1.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #59] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #57] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #55] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #53] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #51] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #49] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x100 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %res = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %op) store <32 x i8> %res, ptr %a @@ -449,6 +2216,23 @@ define <2 x i16> @cttz_v2i16(<2 x i16> %op) { ; CHECK-NEXT: clz z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w9, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %op) ret <2 x i16> %res } @@ -462,6 +2246,34 @@ define <4 x i16> @cttz_v4i16(<4 x i16> %op) { ; CHECK-NEXT: clz z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w8, [sp] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %op) ret <4 x i16> %res } @@ -475,6 +2287,54 @@ define <8 x i16> @cttz_v8i16(<8 x i16> %op) { ; CHECK-NEXT: clz z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrh w8, [sp] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %op) ret <8 x i16> %res } @@ -490,6 +2350,96 @@ define void @cttz_v16i16(ptr %a) { ; CHECK-NEXT: clz z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x10000 +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %op) store <16 x i16> %res, ptr %a @@ -505,6 +2455,21 @@ define <2 x i32> @cttz_v2i32(<2 x i32> %op) { ; CHECK-NEXT: clz z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w9, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %op) ret <2 x i32> %res } @@ -518,6 +2483,28 @@ define <4 x i32> @cttz_v4i32(<4 x i32> %op) { ; CHECK-NEXT: clz z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w9, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w9, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %op) ret <4 x i32> %res } @@ -533,6 +2520,44 @@ define void @cttz_v8i32(ptr %a) { ; CHECK-NEXT: clz z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w9, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w9, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w9, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w9, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: clz w8, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %op) store <8 x i32> %res, ptr %a @@ -548,6 +2573,18 @@ define <1 x i64> @cttz_v1i64(<1 x i64> %op) { ; CHECK-NEXT: clz z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: rbit x8, x8 +; NONEON-NOSVE-NEXT: clz x8, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %op) ret <1 x i64> %res } @@ -561,6 +2598,21 @@ define <2 x i64> @cttz_v2i64(<2 x i64> %op) { ; CHECK-NEXT: clz z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: rbit x8, x8 +; NONEON-NOSVE-NEXT: clz x9, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: rbit x8, x8 +; NONEON-NOSVE-NEXT: clz x8, x8 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %op) ret <2 x i64> %res } @@ -576,6 +2628,30 @@ define void @cttz_v4i64(ptr %a) { ; CHECK-NEXT: clz z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: rbit x8, x8 +; NONEON-NOSVE-NEXT: clz x9, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: rbit x8, x8 +; NONEON-NOSVE-NEXT: clz x8, x8 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: rbit x8, x8 +; NONEON-NOSVE-NEXT: clz x9, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: rbit x8, x8 +; NONEON-NOSVE-NEXT: clz x8, x8 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %op) store <4 x i64> %res, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll index 260ad16581f13..41065b3602003 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -11,6 +12,18 @@ define void @bitcast_v4i8(ptr %a, ptr %b) { ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] ; CHECK-NEXT: st1b { z0.h }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldrb w8, [x0] +; NONEON-NOSVE-NEXT: ldrb w9, [x0, #1] +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #2] +; NONEON-NOSVE-NEXT: ldrb w11, [x0, #3] +; NONEON-NOSVE-NEXT: strb w11, [x1, #3] +; NONEON-NOSVE-NEXT: strb w10, [x1, #2] +; NONEON-NOSVE-NEXT: strb w9, [x1, #1] +; NONEON-NOSVE-NEXT: strb w8, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <4 x i8>, ptr %a %cast = bitcast <4 x i8> %load to <4 x i8> store volatile <4 x i8> %cast, ptr %b @@ -23,6 +36,12 @@ define void @bitcast_v8i8(ptr %a, ptr %b) { ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <8 x i8>, ptr %a %cast = bitcast <8 x i8> %load to <8 x i8> store volatile <8 x i8> %cast, ptr %b @@ -35,6 +54,12 @@ define void @bitcast_v16i8(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <16 x i8>, ptr %a %cast = bitcast <16 x i8> %load to <16 x i8> store volatile <16 x i8> %cast, ptr %b @@ -49,6 +74,14 @@ define void @bitcast_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: str q1, [x1, #16] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: str q1, [x1, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <32 x i8>, ptr %a %cast = bitcast <32 x i8> %load to <32 x i8> store volatile <32 x i8> %cast, ptr %b @@ -72,6 +105,26 @@ define void @bitcast_v2i16(ptr %a, ptr %b) { ; CHECK-NEXT: str w8, [x1] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrh w8, [x0, #2] +; NONEON-NOSVE-NEXT: str w8, [sp, #4] +; NONEON-NOSVE-NEXT: ldrh w8, [x0] +; NONEON-NOSVE-NEXT: str w8, [sp] +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w9, [sp, #18] +; NONEON-NOSVE-NEXT: strh w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: str d0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: str w8, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %load = load volatile <2 x i16>, ptr %a %cast = bitcast <2 x i16> %load to <2 x half> store volatile <2 x half> %cast, ptr %b @@ -84,6 +137,12 @@ define void @bitcast_v4i16(ptr %a, ptr %b) { ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <4 x i16>, ptr %a %cast = bitcast <4 x i16> %load to <4 x half> store volatile <4 x half> %cast, ptr %b @@ -96,6 +155,12 @@ define void @bitcast_v8i16(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <8 x i16>, ptr %a %cast = bitcast <8 x i16> %load to <8 x half> store volatile <8 x half> %cast, ptr %b @@ -110,6 +175,14 @@ define void @bitcast_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: str q1, [x1, #16] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: str q1, [x1, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <16 x i16>, ptr %a %cast = bitcast <16 x i16> %load to <16 x half> store volatile <16 x half> %cast, ptr %b @@ -122,6 +195,12 @@ define void @bitcast_v2i32(ptr %a, ptr %b) { ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <2 x i32>, ptr %a %cast = bitcast <2 x i32> %load to <2 x float> store volatile <2 x float> %cast, ptr %b @@ -134,6 +213,12 @@ define void @bitcast_v4i32(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <4 x i32>, ptr %a %cast = bitcast <4 x i32> %load to <4 x float> store volatile <4 x float> %cast, ptr %b @@ -148,6 +233,14 @@ define void @bitcast_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: str q1, [x1, #16] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: str q1, [x1, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <8 x i32>, ptr %a %cast = bitcast <8 x i32> %load to <8 x float> store volatile <8 x float> %cast, ptr %b @@ -160,6 +253,12 @@ define void @bitcast_v1i64(ptr %a, ptr %b) { ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <1 x i64>, ptr %a %cast = bitcast <1 x i64> %load to <1 x double> store volatile <1 x double> %cast, ptr %b @@ -172,6 +271,12 @@ define void @bitcast_v2i64(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <2 x i64>, ptr %a %cast = bitcast <2 x i64> %load to <2 x double> store volatile <2 x double> %cast, ptr %b @@ -186,6 +291,14 @@ define void @bitcast_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: str q1, [x1, #16] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: str q1, [x1, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <4 x i64>, ptr %a %cast = bitcast <4 x i64> %load to <4 x double> store volatile <4 x double> %cast, ptr %b diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll index 9a07bd8bd5ac9..b908dd61f2401 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64" @@ -30,6 +31,43 @@ define <8 x i32> @fixed_bitselect_v8i32(ptr %pre_cond_ptr, ptr %left_ptr, ptr %r ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fixed_bitselect_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x1] +; NONEON-NOSVE-NEXT: ldp q5, q4, [x2] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: neg w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: neg w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: neg w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: neg w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: neg w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: neg w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: neg w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: neg w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: bsl v0.16b, v3.16b, v5.16b +; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v4.16b +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %pre_cond = load <8 x i32>, ptr %pre_cond_ptr %left = load <8 x i32>, ptr %left_ptr %right = load <8 x i32>, ptr %right_ptr diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll index aec434b4819d7..318a9cf7d738b 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -10,6 +11,12 @@ define void @build_vector_7_inc1_v4i1(ptr %a) { ; CHECK-NEXT: mov w8, #5 // =0x5 ; CHECK-NEXT: strb w8, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_7_inc1_v4i1: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: strb w8, [x0] +; NONEON-NOSVE-NEXT: ret store <4 x i1> , ptr %a, align 1 ret void } @@ -23,6 +30,15 @@ define void @build_vector_7_inc1_v32i8(ptr %a) { ; CHECK-NEXT: add z1.b, z1.b, #23 // =0x17 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_7_inc1_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI1_0 +; NONEON-NOSVE-NEXT: adrp x9, .LCPI1_1 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI1_0] +; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI1_1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <32 x i8> , ptr %a, align 1 ret void } @@ -35,6 +51,15 @@ define void @build_vector_0_inc2_v16i16(ptr %a) { ; CHECK-NEXT: add z0.h, z0.h, #16 // =0x10 ; CHECK-NEXT: str q0, [x0, #16] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_0_inc2_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI2_0 +; NONEON-NOSVE-NEXT: adrp x9, .LCPI2_1 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI2_0] +; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI2_1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <16 x i16> , ptr %a, align 2 ret void } @@ -48,6 +73,15 @@ define void @build_vector_0_dec3_v8i32(ptr %a) { ; CHECK-NEXT: add z1.s, z0.s, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_0_dec3_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI3_0 +; NONEON-NOSVE-NEXT: adrp x9, .LCPI3_1 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI3_0] +; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI3_1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <8 x i32> , ptr %a, align 4 ret void } @@ -64,6 +98,15 @@ define void @build_vector_minus2_dec32_v4i64(ptr %a) { ; CHECK-NEXT: add z0.d, z0.d, z2.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_minus2_dec32_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI4_0 +; NONEON-NOSVE-NEXT: adrp x9, .LCPI4_1 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI4_0] +; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI4_1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <4 x i64> , ptr %a, align 8 ret void } @@ -76,6 +119,15 @@ define void @build_vector_no_stride_v4i64(ptr %a) { ; CHECK-NEXT: index z1.d, #0, #4 ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_no_stride_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI5_0 +; NONEON-NOSVE-NEXT: adrp x9, .LCPI5_1 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI5_0] +; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI5_1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <4 x i64> , ptr %a, align 8 ret void } @@ -89,6 +141,15 @@ define void @build_vector_0_inc2_v16f16(ptr %a) { ; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI6_1] ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_0_inc2_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI6_0 +; NONEON-NOSVE-NEXT: adrp x9, .LCPI6_1 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI6_0] +; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI6_1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <16 x half> , ptr %a, align 2 ret void } @@ -103,6 +164,15 @@ define void @build_vector_0_dec3_v8f32(ptr %a) { ; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI7_1] ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_0_dec3_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI7_0 +; NONEON-NOSVE-NEXT: adrp x9, .LCPI7_1 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI7_0] +; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI7_1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <8 x float> , ptr %a, align 4 ret void } @@ -117,6 +187,15 @@ define void @build_vector_minus2_dec32_v4f64(ptr %a) { ; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI8_1] ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_minus2_dec32_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI8_0 +; NONEON-NOSVE-NEXT: adrp x9, .LCPI8_1 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI8_0] +; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI8_1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <4 x double> , ptr %a, align 8 ret void } @@ -131,6 +210,15 @@ define void @build_vector_no_stride_v4f64(ptr %a) { ; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI9_1] ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_no_stride_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI9_0 +; NONEON-NOSVE-NEXT: adrp x9, .LCPI9_1 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI9_0] +; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI9_1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <4 x double> , ptr %a, align 8 ret void } diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll index 82e75d6efda35..a845c3cbdc2b6 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -40,6 +41,31 @@ define <8 x i8> @concat_v8i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = shufflevector <4 x i8> %op1, <4 x i8> %op2, <8 x i32> ret <8 x i8> %res } @@ -53,6 +79,13 @@ define <16 x i8> @concat_v16i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr q0, [sp], #16 +; NONEON-NOSVE-NEXT: ret %res = shufflevector <8 x i8> %op1, <8 x i8> %op2, <16 x i32> ret <16 x i8> %res @@ -65,6 +98,13 @@ define void @concat_v32i8(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i8>, ptr %a %op2 = load <16 x i8>, ptr %b %res = shufflevector <16 x i8> %op1, <16 x i8> %op2, <32 x i32> , ptr %a %op2 = load <32 x i8>, ptr %b %res = shufflevector <32 x i8> %op1, <32 x i8> %op2, <64 x i32> @concat_v4i16(<2 x i16> %op1, <2 x i16> %op2) { ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: strh w9, [sp, #30] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w9, [sp, #26] +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = shufflevector <2 x i16> %op1, <2 x i16> %op2, <4 x i32> ret <4 x i16> %res } @@ -135,6 +198,13 @@ define <8 x i16> @concat_v8i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr q0, [sp], #16 +; NONEON-NOSVE-NEXT: ret %res = shufflevector <4 x i16> %op1, <4 x i16> %op2, <8 x i32> ret <8 x i16> %res } @@ -146,6 +216,13 @@ define void @concat_v16i16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i16>, ptr %a %op2 = load <8 x i16>, ptr %b %res = shufflevector <8 x i16> %op1, <8 x i16> %op2, <16 x i32> , ptr %a %op2 = load <16 x i16>, ptr %b %res = shufflevector <16 x i16> %op1, <16 x i16> %op2, <32 x i32> @concat_v2i32(<1 x i32> %op1, <1 x i32> %op2) { ; CHECK-NEXT: zip1 z0.s, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = shufflevector <1 x i32> %op1, <1 x i32> %op2, <2 x i32> ret <2 x i32> %res } @@ -199,6 +296,13 @@ define <4 x i32> @concat_v4i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr q0, [sp], #16 +; NONEON-NOSVE-NEXT: ret %res = shufflevector <2 x i32> %op1, <2 x i32> %op2, <4 x i32> ret <4 x i32> %res } @@ -210,6 +314,13 @@ define void @concat_v8i32(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i32>, ptr %a %op2 = load <4 x i32>, ptr %b %res = shufflevector <4 x i32> %op1, <4 x i32> %op2, <8 x i32> @@ -225,6 +336,14 @@ define void @concat_v16i32(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: stp q0, q1, [x2, #32] ; CHECK-NEXT: stp q3, q2, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v16i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2, #32] +; NONEON-NOSVE-NEXT: stp q3, q2, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = shufflevector <8 x i32> %op1, <8 x i32> %op2, <16 x i32> @concat_v2i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr q0, [sp], #16 +; NONEON-NOSVE-NEXT: ret %res = shufflevector <1 x i64> %op1, <1 x i64> %op2, <2 x i32> ret <2 x i64> %res } @@ -258,6 +384,13 @@ define void @concat_v4i64(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x i64>, ptr %a %op2 = load <2 x i64>, ptr %b %res = shufflevector <2 x i64> %op1, <2 x i64> %op2, <4 x i32> @@ -273,6 +406,14 @@ define void @concat_v8i64(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: stp q0, q1, [x2, #32] ; CHECK-NEXT: stp q3, q2, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v8i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2, #32] +; NONEON-NOSVE-NEXT: stp q3, q2, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = shufflevector <4 x i64> %op1, <4 x i64> %op2, <8 x i32> @@ -300,6 +441,18 @@ define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2) { ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = shufflevector <2 x half> %op1, <2 x half> %op2, <4 x i32> ret <4 x half> %res } @@ -313,6 +466,13 @@ define <8 x half> @concat_v8f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr q0, [sp], #16 +; NONEON-NOSVE-NEXT: ret %res = shufflevector <4 x half> %op1, <4 x half> %op2, <8 x i32> ret <8 x half> %res } @@ -324,6 +484,13 @@ define void @concat_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x half>, ptr %a %op2 = load <8 x half>, ptr %b %res = shufflevector <8 x half> %op1, <8 x half> %op2, <16 x i32> , ptr %a %op2 = load <16 x half>, ptr %b %res = shufflevector <16 x half> %op1, <16 x half> %op2, <32 x i32> @concat_v2f32(<1 x float> %op1, <1 x float> %op2) { ; CHECK-NEXT: zip1 z0.s, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = shufflevector <1 x float> %op1, <1 x float> %op2, <2 x i32> ret <2 x float> %res } @@ -377,6 +564,13 @@ define <4 x float> @concat_v4f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr q0, [sp], #16 +; NONEON-NOSVE-NEXT: ret %res = shufflevector <2 x float> %op1, <2 x float> %op2, <4 x i32> ret <4 x float> %res } @@ -388,6 +582,13 @@ define void @concat_v8f32(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x float>, ptr %a %op2 = load <4 x float>, ptr %b %res = shufflevector <4 x float> %op1, <4 x float> %op2, <8 x i32> @@ -403,6 +604,14 @@ define void @concat_v16f32(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: stp q0, q1, [x2, #32] ; CHECK-NEXT: stp q3, q2, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v16f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2, #32] +; NONEON-NOSVE-NEXT: stp q3, q2, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %res = shufflevector <8 x float> %op1, <8 x float> %op2, <16 x i32> @concat_v2f64(<1 x double> %op1, <1 x double> %op2) { ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr q0, [sp], #16 +; NONEON-NOSVE-NEXT: ret %res = shufflevector <1 x double> %op1, <1 x double> %op2, <2 x i32> ret <2 x double> %res } @@ -436,6 +652,13 @@ define void @concat_v4f64(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x double>, ptr %a %op2 = load <2 x double>, ptr %b %res = shufflevector <2 x double> %op1, <2 x double> %op2, <4 x i32> @@ -451,6 +674,14 @@ define void @concat_v8f64(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: stp q0, q1, [x2, #32] ; CHECK-NEXT: stp q3, q2, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v8f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2, #32] +; NONEON-NOSVE-NEXT: stp q3, q2, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %res = shufflevector <4 x double> %op1, <4 x double> %op2, <8 x i32> @@ -468,6 +699,12 @@ define void @concat_v32i8_undef(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v32i8_undef: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i8>, ptr %a %res = shufflevector <16 x i8> %op1, <16 x i8> undef, <32 x i32> , ptr %a %res = shufflevector <8 x i16> %op1, <8 x i16> undef, <16 x i32> @@ -496,6 +739,12 @@ define void @concat_v8i32_undef(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v8i32_undef: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i32>, ptr %a %res = shufflevector <4 x i32> %op1, <4 x i32> undef, <8 x i32> store <8 x i32> %res, ptr %b @@ -508,6 +757,12 @@ define void @concat_v4i64_undef(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v4i64_undef: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x i64>, ptr %a %res = shufflevector <2 x i64> %op1, <2 x i64> undef, <4 x i32> store <4 x i64> %res, ptr %b @@ -524,6 +779,16 @@ define void @concat_v32i8_4op(ptr %a, ptr %b) { ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v32i8_4op: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr q0, [sp] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i8>, ptr %a %shuffle = shufflevector <8 x i8> %op1, <8 x i8> undef, <16 x i32> @@ -541,6 +806,16 @@ define void @concat_v16i16_4op(ptr %a, ptr %b) { ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v16i16_4op: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr q0, [sp] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i16>, ptr %a %shuffle = shufflevector <4 x i16> %op1, <4 x i16> undef, <8 x i32> %res = shufflevector <8 x i16> %shuffle, <8 x i16> undef, <16 x i32> , ptr %a %shuffle = shufflevector <2 x i32> %op1, <2 x i32> undef, <4 x i32> %res = shufflevector <4 x i32> %shuffle, <4 x i32> undef, <8 x i32> @@ -568,6 +853,16 @@ define void @concat_v4i64_4op(ptr %a, ptr %b) { ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v4i64_4op: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr q0, [sp] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <1 x i64>, ptr %a %shuffle = shufflevector <1 x i64> %op1, <1 x i64> undef, <2 x i32> %res = shufflevector <2 x i64> %shuffle, <2 x i64> undef, <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll index 040e5861e9810..2cdd4374a56c5 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -11,6 +12,32 @@ define <8 x i16> @load_zext_v8i8i16(ptr %ap) { ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_zext_v8i8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: strh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: strh w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %a = load <8 x i8>, ptr %ap %val = zext <8 x i8> %a to <8 x i16> ret <8 x i16> %val @@ -23,6 +50,22 @@ define <4 x i32> @load_zext_v4i16i32(ptr %ap) { ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_zext_v4i16i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %a = load <4 x i16>, ptr %ap %val = zext <4 x i16> %a to <4 x i32> ret <4 x i32> %val @@ -35,6 +78,19 @@ define <2 x i64> @load_zext_v2i32i64(ptr %ap) { ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_zext_v2i32i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #24] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %a = load <2 x i32>, ptr %ap %val = zext <2 x i32> %a to <2 x i64> ret <2 x i64> %val @@ -54,6 +110,20 @@ define <2 x i256> @load_zext_v2i64i256(ptr %ap) { ; CHECK-NEXT: mov x7, xzr ; CHECK-NEXT: fmov x4, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_zext_v2i64i256: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp x0, x4, [sp], #16 +; NONEON-NOSVE-NEXT: mov x1, xzr +; NONEON-NOSVE-NEXT: mov x2, xzr +; NONEON-NOSVE-NEXT: mov x3, xzr +; NONEON-NOSVE-NEXT: mov x5, xzr +; NONEON-NOSVE-NEXT: mov x6, xzr +; NONEON-NOSVE-NEXT: mov x7, xzr +; NONEON-NOSVE-NEXT: ret %a = load <2 x i64>, ptr %ap %val = zext <2 x i64> %a to <2 x i256> ret <2 x i256> %val @@ -75,6 +145,79 @@ define <16 x i32> @load_sext_v16i8i32(ptr %ap) { ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_sext_v16i8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #160 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #27] +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26] +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #25] +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24] +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #31] +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30] +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #29] +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28] +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #94] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20] +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #90] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #88] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #86] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #84] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #82] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #80] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #78] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #128] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #74] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #72] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #70] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #68] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #66] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #64] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #96] +; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: ret %a = load <16 x i8>, ptr %ap %val = sext <16 x i8> %a to <16 x i32> ret <16 x i32> %val @@ -90,6 +233,29 @@ define <8 x i32> @load_sext_v8i16i32(ptr %ap) { ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_sext_v8i16i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #26] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #22] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #18] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %a = load <8 x i16>, ptr %ap %val = sext <8 x i16> %a to <8 x i32> ret <8 x i32> %val @@ -121,6 +287,36 @@ define <4 x i256> @load_sext_v4i32i256(ptr %ap) { ; CHECK-NEXT: stp x12, x12, [x8, #112] ; CHECK-NEXT: stp x11, x12, [x8, #96] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_sext_v4i32i256: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldpsw x9, x10, [sp, #24] +; NONEON-NOSVE-NEXT: stp x9, x10, [sp, #48] +; NONEON-NOSVE-NEXT: ldpsw x9, x10, [sp, #16] +; NONEON-NOSVE-NEXT: stp x9, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: ldp x11, x9, [sp, #64] +; NONEON-NOSVE-NEXT: ldp x12, x13, [sp, #80] +; NONEON-NOSVE-NEXT: asr x10, x9, #63 +; NONEON-NOSVE-NEXT: asr x14, x11, #63 +; NONEON-NOSVE-NEXT: stp x10, x10, [x8, #112] +; NONEON-NOSVE-NEXT: stp x9, x10, [x8, #96] +; NONEON-NOSVE-NEXT: asr x9, x13, #63 +; NONEON-NOSVE-NEXT: asr x10, x12, #63 +; NONEON-NOSVE-NEXT: stp x14, x14, [x8, #80] +; NONEON-NOSVE-NEXT: stp x11, x14, [x8, #64] +; NONEON-NOSVE-NEXT: stp x9, x9, [x8, #48] +; NONEON-NOSVE-NEXT: stp x13, x9, [x8, #32] +; NONEON-NOSVE-NEXT: stp x10, x10, [x8, #16] +; NONEON-NOSVE-NEXT: stp x12, x10, [x8] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %a = load <4 x i32>, ptr %ap %val = sext <4 x i32> %a to <4 x i256> ret <4 x i256> %val @@ -154,6 +350,30 @@ define <2 x i256> @load_sext_v2i64i256(ptr %ap) { ; CHECK-NEXT: fmov x1, d6 ; CHECK-NEXT: fmov x5, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_sext_v2i64i256: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #144 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 144 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: ldp x8, x10, [sp] +; NONEON-NOSVE-NEXT: asr x9, x8, #63 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: asr x8, x10, #63 +; NONEON-NOSVE-NEXT: stp x9, x9, [sp, #16] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16] +; NONEON-NOSVE-NEXT: stp x10, x8, [sp, #64] +; NONEON-NOSVE-NEXT: stp x8, x8, [sp, #48] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48] +; NONEON-NOSVE-NEXT: ldp x0, x1, [sp, #96] +; NONEON-NOSVE-NEXT: ldp x2, x3, [sp, #80] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #112] +; NONEON-NOSVE-NEXT: ldp x4, x5, [sp, #128] +; NONEON-NOSVE-NEXT: ldp x6, x7, [sp, #112] +; NONEON-NOSVE-NEXT: add sp, sp, #144 +; NONEON-NOSVE-NEXT: ret %a = load <2 x i64>, ptr %ap %val = sext <2 x i64> %a to <2 x i256> ret <2 x i256> %val @@ -187,6 +407,92 @@ define <16 x i64> @load_zext_v16i16i64(ptr %ap) { ; CHECK-NEXT: // kill: def $q6 killed $q6 killed $z6 ; CHECK-NEXT: // kill: def $q7 killed $q7 killed $z7 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_zext_v16i16i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #336 +; NONEON-NOSVE-NEXT: str x29, [sp, #320] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 336 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: str wzr, [sp, #316] +; NONEON-NOSVE-NEXT: str wzr, [sp, #308] +; NONEON-NOSVE-NEXT: ldr x29, [sp, #320] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: str wzr, [sp, #300] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: str wzr, [sp, #292] +; NONEON-NOSVE-NEXT: str wzr, [sp, #284] +; NONEON-NOSVE-NEXT: str wzr, [sp, #276] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #40] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #50] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48] +; NONEON-NOSVE-NEXT: str wzr, [sp, #268] +; NONEON-NOSVE-NEXT: str wzr, [sp, #260] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #54] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #56] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #88] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #44] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #66] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #64] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #152] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #72] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #70] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #68] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #58] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #136] +; NONEON-NOSVE-NEXT: ldp d2, d1, [sp, #120] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #160] +; NONEON-NOSVE-NEXT: str d1, [sp, #328] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #104] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #248] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #240] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #152] +; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #176] +; NONEON-NOSVE-NEXT: str d0, [sp, #168] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #232] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #224] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #144] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #224] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #216] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #208] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #136] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #192] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #332] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #200] +; NONEON-NOSVE-NEXT: str w8, [sp, #312] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #328] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #192] +; NONEON-NOSVE-NEXT: str w8, [sp, #304] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #184] +; NONEON-NOSVE-NEXT: str w9, [sp, #296] +; NONEON-NOSVE-NEXT: str w8, [sp, #288] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #176] +; NONEON-NOSVE-NEXT: ldp q5, q4, [sp, #288] +; NONEON-NOSVE-NEXT: str w9, [sp, #280] +; NONEON-NOSVE-NEXT: str w8, [sp, #272] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #168] +; NONEON-NOSVE-NEXT: str w9, [sp, #264] +; NONEON-NOSVE-NEXT: str w8, [sp, #256] +; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #256] +; NONEON-NOSVE-NEXT: add sp, sp, #336 +; NONEON-NOSVE-NEXT: ret %a = load <16 x i16>, ptr %ap %val = zext <16 x i16> %a to <16 x i64> ret <16 x i64> %val diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll index 45a804becbc55..b7b34cfa1517c 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -27,6 +28,22 @@ define <4 x i1> @extract_subvector_v8i1(<8 x i1> %op) { ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v8i1: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: strh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: strh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %ret = call <4 x i1> @llvm.vector.extract.v4i1.v8i1(<8 x i1> %op, i64 4) ret <4 x i1> %ret } @@ -54,6 +71,22 @@ define <4 x i8> @extract_subvector_v8i8(<8 x i8> %op) { ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: strh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: strh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %ret = call <4 x i8> @llvm.vector.extract.v4i8.v8i8(<8 x i8> %op, i64 4) ret <4 x i8> %ret } @@ -65,6 +98,14 @@ define <8 x i8> @extract_subvector_v16i8(<16 x i8> %op) { ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %ret = call <8 x i8> @llvm.vector.extract.v8i8.v16i8(<16 x i8> %op, i64 8) ret <8 x i8> %ret } @@ -75,6 +116,12 @@ define void @extract_subvector_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %ret = call <16 x i8> @llvm.vector.extract.v16i8.v32i8(<32 x i8> %op, i64 16) store <16 x i8> %ret, ptr %b @@ -91,6 +138,15 @@ define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) { ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %ret = call <2 x i16> @llvm.vector.extract.v2i16.v4i16(<4 x i16> %op, i64 2) ret <2 x i16> %ret } @@ -102,6 +158,14 @@ define <4 x i16> @extract_subvector_v8i16(<8 x i16> %op) { ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %ret = call <4 x i16> @llvm.vector.extract.v4i16.v8i16(<8 x i16> %op, i64 4) ret <4 x i16> %ret } @@ -112,6 +176,12 @@ define void @extract_subvector_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %ret = call <8 x i16> @llvm.vector.extract.v8i16.v16i16(<16 x i16> %op, i64 8) store <8 x i16> %ret, ptr %b @@ -127,6 +197,16 @@ define <1 x i32> @extract_subvector_v2i32(<2 x i32> %op) { ; CHECK-NEXT: mov z0.s, z0.s[1] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: str w8, [sp] +; NONEON-NOSVE-NEXT: ldr d0, [sp], #16 +; NONEON-NOSVE-NEXT: ret %ret = call <1 x i32> @llvm.vector.extract.v1i32.v2i32(<2 x i32> %op, i64 1) ret <1 x i32> %ret } @@ -138,6 +218,14 @@ define <2 x i32> @extract_subvector_v4i32(<4 x i32> %op) { ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %ret = call <2 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %op, i64 2) ret <2 x i32> %ret } @@ -148,6 +236,12 @@ define void @extract_subvector_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %ret = call <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32> %op, i64 4) store <4 x i32> %ret, ptr %b @@ -163,6 +257,14 @@ define <1 x i64> @extract_subvector_v2i64(<2 x i64> %op) { ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %ret = call <1 x i64> @llvm.vector.extract.v1i64.v2i64(<2 x i64> %op, i64 1) ret <1 x i64> %ret } @@ -173,6 +275,12 @@ define void @extract_subvector_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %ret = call <2 x i64> @llvm.vector.extract.v2i64.v4i64(<4 x i64> %op, i64 2) store <2 x i64> %ret, ptr %b @@ -190,6 +298,16 @@ define <2 x half> @extract_subvector_v4f16(<4 x half> %op) { ; CHECK-NEXT: tbl z0.h, { z0.h }, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: str w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %ret = call <2 x half> @llvm.vector.extract.v2f16.v4f16(<4 x half> %op, i64 2) ret <2 x half> %ret } @@ -201,6 +319,14 @@ define <4 x half> @extract_subvector_v8f16(<8 x half> %op) { ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %ret = call <4 x half> @llvm.vector.extract.v4f16.v8f16(<8 x half> %op, i64 4) ret <4 x half> %ret } @@ -211,6 +337,12 @@ define void @extract_subvector_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %ret = call <8 x half> @llvm.vector.extract.v8f16.v16f16(<16 x half> %op, i64 8) store <8 x half> %ret, ptr %b @@ -226,6 +358,16 @@ define <1 x float> @extract_subvector_v2f32(<2 x float> %op) { ; CHECK-NEXT: mov z0.s, z0.s[1] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: str w8, [sp] +; NONEON-NOSVE-NEXT: ldr d0, [sp], #16 +; NONEON-NOSVE-NEXT: ret %ret = call <1 x float> @llvm.vector.extract.v1f32.v2f32(<2 x float> %op, i64 1) ret <1 x float> %ret } @@ -237,6 +379,14 @@ define <2 x float> @extract_subvector_v4f32(<4 x float> %op) { ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %ret = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> %op, i64 2) ret <2 x float> %ret } @@ -247,6 +397,12 @@ define void @extract_subvector_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %ret = call <4 x float> @llvm.vector.extract.v4f32.v8f32(<8 x float> %op, i64 4) store <4 x float> %ret, ptr %b @@ -262,6 +418,14 @@ define <1 x double> @extract_subvector_v2f64(<2 x double> %op) { ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %ret = call <1 x double> @llvm.vector.extract.v1f64.v2f64(<2 x double> %op, i64 1) ret <1 x double> %ret } @@ -272,6 +436,12 @@ define void @extract_subvector_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %ret = call <2 x double> @llvm.vector.extract.v2f64.v4f64(<4 x double> %op, i64 2) store <2 x double> %ret, ptr %b diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll index 9c3b5e14289dc..0a1831a94d8fe 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -15,6 +16,15 @@ define half @extractelement_v2f16(<2 x half> %op1) { ; CHECK-NEXT: mov z0.h, z0.h[1] ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extractelement_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %r = extractelement <2 x half> %op1, i64 1 ret half %r } @@ -26,6 +36,15 @@ define half @extractelement_v4f16(<4 x half> %op1) { ; CHECK-NEXT: mov z0.h, z0.h[3] ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extractelement_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %r = extractelement <4 x half> %op1, i64 3 ret half %r } @@ -37,6 +56,14 @@ define half @extractelement_v8f16(<8 x half> %op1) { ; CHECK-NEXT: mov z0.h, z0.h[7] ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extractelement_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %r = extractelement <8 x half> %op1, i64 7 ret half %r } @@ -48,6 +75,15 @@ define half @extractelement_v16f16(ptr %a) { ; CHECK-NEXT: mov z0.h, z0.h[7] ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extractelement_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %r = extractelement <16 x half> %op1, i64 15 ret half %r @@ -60,6 +96,15 @@ define float @extractelement_v2f32(<2 x float> %op1) { ; CHECK-NEXT: mov z0.s, z0.s[1] ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extractelement_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %r = extractelement <2 x float> %op1, i64 1 ret float %r } @@ -71,6 +116,14 @@ define float @extractelement_v4f32(<4 x float> %op1) { ; CHECK-NEXT: mov z0.s, z0.s[3] ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extractelement_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %r = extractelement <4 x float> %op1, i64 3 ret float %r } @@ -82,6 +135,15 @@ define float @extractelement_v8f32(ptr %a) { ; CHECK-NEXT: mov z0.s, z0.s[3] ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extractelement_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %r = extractelement <8 x float> %op1, i64 7 ret float %r @@ -91,6 +153,10 @@ define double @extractelement_v1f64(<1 x double> %op1) { ; CHECK-LABEL: extractelement_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extractelement_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ret %r = extractelement <1 x double> %op1, i64 0 ret double %r } @@ -101,6 +167,14 @@ define double @extractelement_v2f64(<2 x double> %op1) { ; CHECK-NEXT: mov z0.d, z0.d[1] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extractelement_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %r = extractelement <2 x double> %op1, i64 1 ret double %r } @@ -112,6 +186,15 @@ define double @extractelement_v4f64(ptr %a) { ; CHECK-NEXT: mov z0.d, z0.d[1] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extractelement_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %r = extractelement <4 x double> %op1, i64 3 ret double %r diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll index 21ce689f68e23..a8d01ec7ce0b4 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll @@ -2,6 +2,7 @@ ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE ; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2 ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2 +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -28,6 +29,62 @@ define void @test_copysign_v4f16_v4f16(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d ; SVE2-NEXT: str d1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v4f16_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #48 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr d0, [x1] +; NONEON-NOSVE-NEXT: ldr d1, [x0] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #38] +; NONEON-NOSVE-NEXT: str h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: str h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #34] +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #32] +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %a = load <4 x half>, ptr %ap %b = load <4 x half>, ptr %bp %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b) @@ -54,6 +111,106 @@ define void @test_copysign_v8f16_v8f16(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d ; SVE2-NEXT: str q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v8f16_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #80 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: str h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: str h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: str h0, [sp, #4] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: str h0, [sp] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #78] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #76] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #74] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #72] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #70] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #68] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #66] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #64] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %a = load <8 x half>, ptr %ap %b = load <8 x half>, ptr %bp %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b) @@ -84,6 +241,195 @@ define void @test_copysign_v16f16_v16f16(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z3.d, z3.d, z4.d, z0.d ; SVE2-NEXT: stp q2, q3, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v16f16_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #160 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #96] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #126] +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #124] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #122] +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: str h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #120] +; NONEON-NOSVE-NEXT: str h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #118] +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #116] +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #114] +; NONEON-NOSVE-NEXT: str h0, [sp, #4] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #112] +; NONEON-NOSVE-NEXT: str h0, [sp] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #94] +; NONEON-NOSVE-NEXT: str h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #92] +; NONEON-NOSVE-NEXT: str h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #90] +; NONEON-NOSVE-NEXT: str h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #88] +; NONEON-NOSVE-NEXT: str h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #86] +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #84] +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #82] +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #80] +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #110] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #158] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #108] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #156] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #106] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #154] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #104] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #152] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #102] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #150] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #100] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #148] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #98] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #61] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #146] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #96] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #57] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #144] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #78] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #53] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #142] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #76] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #49] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #140] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #74] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #45] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #138] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #72] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #136] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #70] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #37] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #134] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #68] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #33] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #132] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #66] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #130] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #64] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #128] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #128] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: ret %a = load <16 x half>, ptr %ap %b = load <16 x half>, ptr %bp %r = call <16 x half> @llvm.copysign.v16f16(<16 x half> %a, <16 x half> %b) @@ -112,6 +458,30 @@ define void @test_copysign_v2f32_v2f32(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d ; SVE2-NEXT: str d1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v2f32_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: ldr d1, [x1] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: tst w9, #0x80000000 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: tst w8, #0x80000000 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %a = load <2 x float>, ptr %ap %b = load <2 x float>, ptr %bp %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) @@ -138,6 +508,41 @@ define void @test_copysign_v4f32_v4f32(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d ; SVE2-NEXT: str q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v4f32_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: tst w9, #0x80000000 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: tst w8, #0x80000000 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w9, #0x80000000 +; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: tst w8, #0x80000000 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %a = load <4 x float>, ptr %ap %b = load <4 x float>, ptr %bp %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) @@ -168,6 +573,67 @@ define void @test_copysign_v8f32_v8f32(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z3.d, z3.d, z4.d, z0.d ; SVE2-NEXT: stp q2, q3, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v8f32_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q2, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q1, [x1] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #44] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: tst w9, #0x80000000 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr s0, [sp, #40] +; NONEON-NOSVE-NEXT: tst w8, #0x80000000 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w9, #0x80000000 +; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #88] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #36] +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr s0, [sp, #32] +; NONEON-NOSVE-NEXT: tst w8, #0x80000000 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w9, #0x80000000 +; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #80] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: tst w8, #0x80000000 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w9, #0x80000000 +; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #72] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: tst w8, #0x80000000 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %a = load <8 x float>, ptr %ap %b = load <8 x float>, ptr %bp %r = call <8 x float> @llvm.copysign.v8f32(<8 x float> %a, <8 x float> %b) @@ -196,6 +662,29 @@ define void @test_copysign_v2f64_v2f64(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d ; SVE2-NEXT: str q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v2f64_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: fabs d0, d0 +; NONEON-NOSVE-NEXT: tst x9, #0x8000000000000000 +; NONEON-NOSVE-NEXT: fneg d1, d0 +; NONEON-NOSVE-NEXT: fcsel d2, d1, d0, ne +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000 +; NONEON-NOSVE-NEXT: fabs d0, d0 +; NONEON-NOSVE-NEXT: fneg d1, d0 +; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, ne +; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %a = load <2 x double>, ptr %ap %b = load <2 x double>, ptr %bp %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) @@ -226,6 +715,43 @@ define void @test_copysign_v4f64_v4f64(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z3.d, z3.d, z4.d, z0.d ; SVE2-NEXT: stp q2, q3, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v4f64_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q2, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q1, [x1] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: fabs d0, d0 +; NONEON-NOSVE-NEXT: tst x9, #0x8000000000000000 +; NONEON-NOSVE-NEXT: fneg d1, d0 +; NONEON-NOSVE-NEXT: fcsel d2, d1, d0, ne +; NONEON-NOSVE-NEXT: ldr d0, [sp, #32] +; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: fabs d0, d0 +; NONEON-NOSVE-NEXT: fneg d1, d0 +; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, ne +; NONEON-NOSVE-NEXT: tst x9, #0x8000000000000000 +; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #80] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: fabs d0, d0 +; NONEON-NOSVE-NEXT: fneg d1, d0 +; NONEON-NOSVE-NEXT: fcsel d2, d1, d0, ne +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000 +; NONEON-NOSVE-NEXT: fabs d0, d0 +; NONEON-NOSVE-NEXT: fneg d1, d0 +; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, ne +; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %a = load <4 x double>, ptr %ap %b = load <4 x double>, ptr %bp %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) @@ -260,6 +786,31 @@ define void @test_copysign_v2f32_v2f64(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d ; SVE2-NEXT: str d2, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v2f32_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #48 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr d1, [x0] +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: str d1, [sp, #8] +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: tst x9, #0x8000000000000000 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %a = load <2 x float>, ptr %ap %b = load <2 x double>, ptr %bp %tmp0 = fptrunc <2 x double> %b to <2 x float> @@ -304,6 +855,43 @@ define void @test_copysign_v4f32_v4f64(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d ; SVE2-NEXT: str q2, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v4f32_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ldr q2, [x0] +; NONEON-NOSVE-NEXT: str q2, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: tst x9, #0x8000000000000000 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst x9, #0x8000000000000000 +; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #56] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %a = load <4 x float>, ptr %ap %b = load <4 x double>, ptr %bp %tmp0 = fptrunc <4 x double> %b to <4 x float> @@ -337,6 +925,31 @@ define void @test_copysign_v2f64_v2f32(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d ; SVE2-NEXT: str q2, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v2f64_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #48 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: ldr d0, [x1] +; NONEON-NOSVE-NEXT: str q1, [sp] +; NONEON-NOSVE-NEXT: str d0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: fabs d0, d0 +; NONEON-NOSVE-NEXT: tst w9, #0x80000000 +; NONEON-NOSVE-NEXT: fneg d1, d0 +; NONEON-NOSVE-NEXT: fcsel d2, d1, d0, ne +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: tst w8, #0x80000000 +; NONEON-NOSVE-NEXT: fabs d0, d0 +; NONEON-NOSVE-NEXT: fneg d1, d0 +; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, ne +; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %a = load <2 x double>, ptr %ap %b = load < 2 x float>, ptr %bp %tmp0 = fpext <2 x float> %b to <2 x double> @@ -381,6 +994,45 @@ define void @test_copysign_v4f64_v4f32(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z4.d, z4.d, z1.d, z2.d ; SVE2-NEXT: stp q3, q4, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v4f64_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: stp q0, q2, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d0, d2, [sp, #16] +; NONEON-NOSVE-NEXT: str q1, [sp] +; NONEON-NOSVE-NEXT: stp d2, d0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: fabs d0, d0 +; NONEON-NOSVE-NEXT: tst w9, #0x80000000 +; NONEON-NOSVE-NEXT: fneg d1, d0 +; NONEON-NOSVE-NEXT: fcsel d2, d1, d0, ne +; NONEON-NOSVE-NEXT: ldr d0, [sp, #32] +; NONEON-NOSVE-NEXT: tst w8, #0x80000000 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: fabs d0, d0 +; NONEON-NOSVE-NEXT: fneg d1, d0 +; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, ne +; NONEON-NOSVE-NEXT: tst w9, #0x80000000 +; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #64] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: fabs d0, d0 +; NONEON-NOSVE-NEXT: fneg d1, d0 +; NONEON-NOSVE-NEXT: fcsel d2, d1, d0, ne +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: tst w8, #0x80000000 +; NONEON-NOSVE-NEXT: fabs d0, d0 +; NONEON-NOSVE-NEXT: fneg d1, d0 +; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, ne +; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %a = load <4 x double>, ptr %ap %b = load <4 x float>, ptr %bp %tmp0 = fpext <4 x float> %b to <4 x double> @@ -416,6 +1068,53 @@ define void @test_copysign_v4f16_v4f32(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d ; SVE2-NEXT: str d2, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v4f16_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #48 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr d1, [x0] +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: str d1, [sp, #8] +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: tst w9, #0x80000000 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80000000 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w9, #0x80000000 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80000000 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %a = load <4 x half>, ptr %ap %b = load <4 x float>, ptr %bp %tmp0 = fptrunc <4 x float> %b to <4 x half> @@ -471,6 +1170,53 @@ define void @test_copysign_v4f16_v4f64(ptr %ap, ptr %bp) { ; SVE2-NEXT: str d5, [x0] ; SVE2-NEXT: add sp, sp, #16 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v4f16_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ldr d2, [x0] +; NONEON-NOSVE-NEXT: str d2, [sp, #8] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: tst x9, #0x8000000000000000 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst x9, #0x8000000000000000 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #56] +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %a = load <4 x half>, ptr %ap %b = load <4 x double>, ptr %bp %tmp0 = fptrunc <4 x double> %b to <4 x half> @@ -514,6 +1260,87 @@ define void @test_copysign_v8f16_v8f32(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d ; SVE2-NEXT: str q2, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v8f16_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ldr q2, [x0] +; NONEON-NOSVE-NEXT: str q2, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: tst w9, #0x80000000 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80000000 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w9, #0x80000000 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80000000 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w9, #0x80000000 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80000000 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w9, #0x80000000 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: tst w8, #0x80000000 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %a = load <8 x half>, ptr %ap %b = load <8 x float>, ptr %bp %tmp0 = fptrunc <8 x float> %b to <8 x half> diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll index b0a82e699939f..e84acfc8504a9 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -17,6 +18,43 @@ define <2 x half> @fadd_v2f16(<2 x half> %op1, <2 x half> %op2) { ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fadd <2 x half> %op1, %op2 ret <2 x half> %res } @@ -30,6 +68,43 @@ define <4 x half> @fadd_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fadd <4 x half> %op1, %op2 ret <4 x half> %res } @@ -43,6 +118,70 @@ define <8 x half> @fadd_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #38] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = fadd <8 x half> %op1, %op2 ret <8 x half> %res } @@ -58,6 +197,131 @@ define void @fadd_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #94] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #92] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #90] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #88] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #86] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #84] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #82] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #80] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #78] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #76] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #74] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #72] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #70] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #68] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #66] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %res = fadd <16 x half> %op1, %op2 @@ -74,6 +338,21 @@ define <2 x float> @fadd_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fadd s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fadd <2 x float> %op1, %op2 ret <2 x float> %res } @@ -87,6 +366,26 @@ define <4 x float> @fadd_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fadd s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fadd s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = fadd <4 x float> %op1, %op2 ret <4 x float> %res } @@ -102,6 +401,43 @@ define void @fadd_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #60] +; NONEON-NOSVE-NEXT: fadd s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #56] +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #32] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #88] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #52] +; NONEON-NOSVE-NEXT: fadd s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #48] +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #80] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fadd s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #72] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fadd s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %res = fadd <8 x float> %op1, %op2 @@ -118,6 +454,20 @@ define <2 x double> @fadd_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: fadd d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: fadd d0, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = fadd <2 x double> %op1, %op2 ret <2 x double> %res } @@ -133,6 +483,31 @@ define void @fadd_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp, #32] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #56] +; NONEON-NOSVE-NEXT: fadd d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #48] +; NONEON-NOSVE-NEXT: fadd d0, d1, d0 +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp] +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #80] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: fadd d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: fadd d0, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %res = fadd <4 x double> %op1, %op2 @@ -153,6 +528,43 @@ define <2 x half> @fdiv_v2f16(<2 x half> %op1, <2 x half> %op2) { ; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fdiv_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fdiv <2 x half> %op1, %op2 ret <2 x half> %res } @@ -166,6 +578,43 @@ define <4 x half> @fdiv_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fdiv_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fdiv <4 x half> %op1, %op2 ret <4 x half> %res } @@ -179,6 +628,70 @@ define <8 x half> @fdiv_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fdiv_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #38] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = fdiv <8 x half> %op1, %op2 ret <8 x half> %res } @@ -194,6 +707,131 @@ define void @fdiv_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: fdiv z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fdiv_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #94] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #92] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #90] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #88] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #86] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #84] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #82] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #80] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #78] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #76] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #74] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #72] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #70] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #68] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #66] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %res = fdiv <16 x half> %op1, %op2 @@ -210,6 +848,21 @@ define <2 x float> @fdiv_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fdiv_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fdiv s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fdiv <2 x float> %op1, %op2 ret <2 x float> %res } @@ -223,6 +876,26 @@ define <4 x float> @fdiv_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fdiv_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fdiv s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fdiv s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = fdiv <4 x float> %op1, %op2 ret <4 x float> %res } @@ -238,6 +911,43 @@ define void @fdiv_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: fdiv z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fdiv_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #60] +; NONEON-NOSVE-NEXT: fdiv s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #56] +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #32] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #88] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #52] +; NONEON-NOSVE-NEXT: fdiv s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #48] +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #80] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fdiv s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #72] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fdiv s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fdiv s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %res = fdiv <8 x float> %op1, %op2 @@ -254,6 +964,20 @@ define <2 x double> @fdiv_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-NEXT: fdiv z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fdiv_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: fdiv d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: fdiv d0, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = fdiv <2 x double> %op1, %op2 ret <2 x double> %res } @@ -269,6 +993,31 @@ define void @fdiv_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fdiv z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fdiv_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp, #32] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #56] +; NONEON-NOSVE-NEXT: fdiv d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #48] +; NONEON-NOSVE-NEXT: fdiv d0, d1, d0 +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp] +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #80] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: fdiv d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: fdiv d0, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %res = fdiv <4 x double> %op1, %op2 @@ -290,6 +1039,52 @@ define <2 x half> @fma_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x half> %op3) ; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #8] +; NONEON-NOSVE-NEXT: str d0, [sp] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h2, [sp] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <2 x half> @llvm.fma.v2f16(<2 x half> %op1, <2 x half> %op2, <2 x half> %op3) ret <2 x half> %res } @@ -304,6 +1099,52 @@ define <4 x half> @fma_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3) ; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #8] +; NONEON-NOSVE-NEXT: str d0, [sp] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h2, [sp] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.fma.v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3) ret <4 x half> %res } @@ -318,6 +1159,88 @@ define <8 x half> @fma_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3) ; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: stp q1, q2, [sp, #16] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h2, [sp] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.fma.v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3) ret <8 x half> %res } @@ -334,6 +1257,165 @@ define void @fma_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: fmla z1.h, p0/m, z3.h, z4.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #128 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x2] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q4, q5, [x0] +; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q4, q2, [sp] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #94] +; NONEON-NOSVE-NEXT: stp q1, q5, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #78] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #62] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #76] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #60] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #126] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #92] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #74] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #58] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #124] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #90] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #72] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #56] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #122] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #88] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #70] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #54] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #120] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #86] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #68] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #52] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #118] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #84] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #66] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #50] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #116] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #82] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #64] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #48] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #114] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #80] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #112] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #110] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #108] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #106] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #104] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #102] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #100] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h2, [sp] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #98] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %op3 = load <16 x half>, ptr %c @@ -352,6 +1434,23 @@ define <2 x float> @fma_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %o ; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #8] +; NONEON-NOSVE-NEXT: str d0, [sp] +; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #8] +; NONEON-NOSVE-NEXT: ldp s2, s4, [sp] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.fma.v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %op3) ret <2 x float> %res } @@ -366,6 +1465,30 @@ define <4 x float> @fma_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %o ; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: stp q1, q2, [sp, #16] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #24] +; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #44] +; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #40] +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #16] +; NONEON-NOSVE-NEXT: ldp s2, s4, [sp] +; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #56] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #36] +; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #32] +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.fma.v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %op3) ret <4 x float> %res } @@ -382,6 +1505,49 @@ define void @fma_v8f32(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: fmla z1.s, p0/m, z3.s, z4.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #128 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x2] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q4, q5, [x0] +; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q4, q2, [sp] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #92] +; NONEON-NOSVE-NEXT: stp q1, q5, [sp, #32] +; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #72] +; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #56] +; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #88] +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #64] +; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #48] +; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #120] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #84] +; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #80] +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #24] +; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #8] +; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #112] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #44] +; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #40] +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #16] +; NONEON-NOSVE-NEXT: ldp s2, s4, [sp] +; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #104] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #36] +; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #32] +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %op3 = load <8 x float>, ptr %c @@ -400,6 +1566,23 @@ define <2 x double> @fma_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double ; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: stp q1, q2, [sp, #16] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: ldp d1, d3, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d2, d4, [sp] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: fmadd d5, d4, d3, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #32] +; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d5, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.fma.v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double> %op3) ret <2 x double> %res } @@ -416,6 +1599,35 @@ define void @fma_v4f64(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: fmla z1.d, p0/m, z3.d, z4.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #128 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x2] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q4, q5, [x0] +; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q4, q2, [sp] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #88] +; NONEON-NOSVE-NEXT: stp q1, q5, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d3, [sp, #64] +; NONEON-NOSVE-NEXT: ldp d2, d4, [sp, #48] +; NONEON-NOSVE-NEXT: fmadd d5, d4, d3, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #80] +; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0 +; NONEON-NOSVE-NEXT: ldp d1, d3, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d2, d4, [sp] +; NONEON-NOSVE-NEXT: stp d0, d5, [sp, #112] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: fmadd d5, d4, d3, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #32] +; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d5, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %op3 = load <4 x double>, ptr %c @@ -437,6 +1649,43 @@ define <2 x half> @fmul_v2f16(<2 x half> %op1, <2 x half> %op2) { ; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmul_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fmul <2 x half> %op1, %op2 ret <2 x half> %res } @@ -450,6 +1699,43 @@ define <4 x half> @fmul_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmul_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fmul <4 x half> %op1, %op2 ret <4 x half> %res } @@ -463,6 +1749,70 @@ define <8 x half> @fmul_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmul_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #38] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = fmul <8 x half> %op1, %op2 ret <8 x half> %res } @@ -478,6 +1828,131 @@ define void @fmul_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: fmul z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmul_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #94] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #92] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #90] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #88] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #86] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #84] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #82] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #80] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #78] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #76] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #74] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #72] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #70] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #68] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #66] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %res = fmul <16 x half> %op1, %op2 @@ -494,6 +1969,21 @@ define <2 x float> @fmul_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmul_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fmul s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fmul <2 x float> %op1, %op2 ret <2 x float> %res } @@ -507,6 +1997,26 @@ define <4 x float> @fmul_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmul_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fmul s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fmul s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = fmul <4 x float> %op1, %op2 ret <4 x float> %res } @@ -522,6 +2032,43 @@ define void @fmul_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: fmul z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmul_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #60] +; NONEON-NOSVE-NEXT: fmul s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #56] +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #32] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #88] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #52] +; NONEON-NOSVE-NEXT: fmul s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #48] +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #80] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fmul s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #72] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fmul s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %res = fmul <8 x float> %op1, %op2 @@ -538,6 +2085,20 @@ define <2 x double> @fmul_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmul_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: fmul d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: fmul d0, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = fmul <2 x double> %op1, %op2 ret <2 x double> %res } @@ -553,6 +2114,31 @@ define void @fmul_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fmul z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmul_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp, #32] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #56] +; NONEON-NOSVE-NEXT: fmul d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #48] +; NONEON-NOSVE-NEXT: fmul d0, d1, d0 +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp] +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #80] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: fmul d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: fmul d0, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %res = fmul <4 x double> %op1, %op2 @@ -572,6 +2158,34 @@ define <2 x half> @fneg_v2f16(<2 x half> %op) { ; CHECK-NEXT: fneg z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fneg_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = fneg <2 x half> %op ret <2 x half> %res } @@ -584,6 +2198,34 @@ define <4 x half> @fneg_v4f16(<4 x half> %op) { ; CHECK-NEXT: fneg z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fneg_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = fneg <4 x half> %op ret <4 x half> %res } @@ -596,6 +2238,54 @@ define <8 x half> @fneg_v8f16(<8 x half> %op) { ; CHECK-NEXT: fneg z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fneg_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fneg <8 x half> %op ret <8 x half> %res } @@ -609,6 +2299,96 @@ define void @fneg_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: fneg z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fneg_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #38] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = fneg <16 x half> %op store <16 x half> %res, ptr %a @@ -623,6 +2403,19 @@ define <2 x float> @fneg_v2f32(<2 x float> %op) { ; CHECK-NEXT: fneg z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fneg_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: fneg s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = fneg <2 x float> %op ret <2 x float> %res } @@ -635,6 +2428,24 @@ define <4 x float> @fneg_v4f32(<4 x float> %op) { ; CHECK-NEXT: fneg z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fneg_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: fneg s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: fneg s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fneg <4 x float> %op ret <4 x float> %res } @@ -648,6 +2459,36 @@ define void @fneg_v8f32(ptr %a) { ; CHECK-NEXT: fneg z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fneg_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fneg s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fneg s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: fneg s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: fneg s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: fneg s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = fneg <8 x float> %op store <8 x float> %res, ptr %a @@ -662,6 +2503,19 @@ define <2 x double> @fneg_v2f64(<2 x double> %op) { ; CHECK-NEXT: fneg z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fneg_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: fneg d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: fneg d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fneg <2 x double> %op ret <2 x double> %res } @@ -675,6 +2529,26 @@ define void @fneg_v4f64(ptr %a) { ; CHECK-NEXT: fneg z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fneg_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: fneg d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: fneg d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: fneg d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: fneg d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = fneg <4 x double> %op store <4 x double> %res, ptr %a @@ -693,6 +2567,34 @@ define <2 x half> @fsqrt_v2f16(<2 x half> %op) { ; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsqrt_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x half> @llvm.sqrt.v2f16(<2 x half> %op) ret <2 x half> %res } @@ -705,6 +2607,34 @@ define <4 x half> @fsqrt_v4f16(<4 x half> %op) { ; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsqrt_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.sqrt.v4f16(<4 x half> %op) ret <4 x half> %res } @@ -717,6 +2647,54 @@ define <8 x half> @fsqrt_v8f16(<8 x half> %op) { ; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsqrt_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %op) ret <8 x half> %res } @@ -730,6 +2708,96 @@ define void @fsqrt_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: fsqrt z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsqrt_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #38] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call <16 x half> @llvm.sqrt.v16f16(<16 x half> %op) store <16 x half> %res, ptr %a @@ -744,6 +2812,19 @@ define <2 x float> @fsqrt_v2f32(<2 x float> %op) { ; CHECK-NEXT: fsqrt z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsqrt_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: fsqrt s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %op) ret <2 x float> %res } @@ -756,6 +2837,24 @@ define <4 x float> @fsqrt_v4f32(<4 x float> %op) { ; CHECK-NEXT: fsqrt z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsqrt_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: fsqrt s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: fsqrt s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %op) ret <4 x float> %res } @@ -769,6 +2868,36 @@ define void @fsqrt_v8f32(ptr %a) { ; CHECK-NEXT: fsqrt z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsqrt_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fsqrt s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fsqrt s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: fsqrt s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: fsqrt s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %op) store <8 x float> %res, ptr %a @@ -783,6 +2912,19 @@ define <2 x double> @fsqrt_v2f64(<2 x double> %op) { ; CHECK-NEXT: fsqrt z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsqrt_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: fsqrt d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: fsqrt d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %op) ret <2 x double> %res } @@ -796,6 +2938,26 @@ define void @fsqrt_v4f64(ptr %a) { ; CHECK-NEXT: fsqrt z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsqrt_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: fsqrt d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: fsqrt d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: fsqrt d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: fsqrt d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %op) store <4 x double> %res, ptr %a @@ -815,6 +2977,43 @@ define <2 x half> @fsub_v2f16(<2 x half> %op1, <2 x half> %op2) { ; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsub_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fsub <2 x half> %op1, %op2 ret <2 x half> %res } @@ -828,6 +3027,43 @@ define <4 x half> @fsub_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsub_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fsub <4 x half> %op1, %op2 ret <4 x half> %res } @@ -841,6 +3077,70 @@ define <8 x half> @fsub_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsub_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #38] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = fsub <8 x half> %op1, %op2 ret <8 x half> %res } @@ -856,6 +3156,131 @@ define void @fsub_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: fsub z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsub_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #94] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #92] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #90] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #88] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #86] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #84] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #82] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #80] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #78] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #76] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #74] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #72] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #70] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #68] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #66] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %res = fsub <16 x half> %op1, %op2 @@ -872,6 +3297,21 @@ define <2 x float> @fsub_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsub_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fsub s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fsub <2 x float> %op1, %op2 ret <2 x float> %res } @@ -885,6 +3325,26 @@ define <4 x float> @fsub_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsub_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fsub s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fsub s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = fsub <4 x float> %op1, %op2 ret <4 x float> %res } @@ -900,6 +3360,43 @@ define void @fsub_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: fsub z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsub_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #60] +; NONEON-NOSVE-NEXT: fsub s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #56] +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #32] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #88] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #52] +; NONEON-NOSVE-NEXT: fsub s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #48] +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #80] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fsub s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #72] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fsub s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fsub s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %res = fsub <8 x float> %op1, %op2 @@ -916,6 +3413,20 @@ define <2 x double> @fsub_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-NEXT: fsub z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsub_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: fsub d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: fsub d0, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = fsub <2 x double> %op1, %op2 ret <2 x double> %res } @@ -931,6 +3442,31 @@ define void @fsub_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fsub z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsub_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp, #32] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #56] +; NONEON-NOSVE-NEXT: fsub d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #48] +; NONEON-NOSVE-NEXT: fsub d0, d1, d0 +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp] +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #80] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: fsub d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: fsub d0, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %res = fsub <4 x double> %op1, %op2 @@ -950,6 +3486,34 @@ define <2 x half> @fabs_v2f16(<2 x half> %op) { ; CHECK-NEXT: fabs z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fabs_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x half> @llvm.fabs.v2f16(<2 x half> %op) ret <2 x half> %res } @@ -962,6 +3526,34 @@ define <4 x half> @fabs_v4f16(<4 x half> %op) { ; CHECK-NEXT: fabs z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fabs_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.fabs.v4f16(<4 x half> %op) ret <4 x half> %res } @@ -974,6 +3566,54 @@ define <8 x half> @fabs_v8f16(<8 x half> %op) { ; CHECK-NEXT: fabs z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fabs_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.fabs.v8f16(<8 x half> %op) ret <8 x half> %res } @@ -987,6 +3627,96 @@ define void @fabs_v16f16(ptr %a) { ; CHECK-NEXT: fabs z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fabs_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #38] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call <16 x half> @llvm.fabs.v16f16(<16 x half> %op) store <16 x half> %res, ptr %a @@ -1001,6 +3731,19 @@ define <2 x float> @fabs_v2f32(<2 x float> %op) { ; CHECK-NEXT: fabs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fabs_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: fabs s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.fabs.v2f32(<2 x float> %op) ret <2 x float> %res } @@ -1013,6 +3756,24 @@ define <4 x float> @fabs_v4f32(<4 x float> %op) { ; CHECK-NEXT: fabs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fabs_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: fabs s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: fabs s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.fabs.v4f32(<4 x float> %op) ret <4 x float> %res } @@ -1026,6 +3787,36 @@ define void @fabs_v8f32(ptr %a) { ; CHECK-NEXT: fabs z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fabs_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fabs s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fabs s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: fabs s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: fabs s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: fabs s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call <8 x float> @llvm.fabs.v8f32(<8 x float> %op) store <8 x float> %res, ptr %a @@ -1040,6 +3831,19 @@ define <2 x double> @fabs_v2f64(<2 x double> %op) { ; CHECK-NEXT: fabs z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fabs_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: fabs d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: fabs d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.fabs.v2f64(<2 x double> %op) ret <2 x double> %res } @@ -1053,6 +3857,26 @@ define void @fabs_v4f64(ptr %a) { ; CHECK-NEXT: fabs z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fabs_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: fabs d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: fabs d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: fabs d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: fabs d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call <4 x double> @llvm.fabs.v4f64(<4 x double> %op) store <4 x double> %res, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll index cbd0ad66fba76..776b6918923ae 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -19,6 +20,28 @@ define <2 x i16> @fcmp_oeq_v2f16(<2 x half> %op1, <2 x half> %op2) { ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_oeq_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w9, eq +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %cmp = fcmp oeq <2 x half> %op1, %op2 %sext = sext <2 x i1> %cmp to <2 x i16> ret <2 x i16> %sext @@ -34,6 +57,43 @@ define <4 x i16> @fcmp_oeq_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_oeq_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %cmp = fcmp oeq <4 x half> %op1, %op2 %sext = sext <4 x i1> %cmp to <4 x i16> ret <4 x i16> %sext @@ -49,6 +109,70 @@ define <8 x i16> @fcmp_oeq_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_oeq_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %cmp = fcmp oeq <8 x half> %op1, %op2 %sext = sext <8 x i1> %cmp to <8 x i16> ret <8 x i16> %sext @@ -66,6 +190,131 @@ define void @fcmp_oeq_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_oeq_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp oeq <16 x half> %op1, %op2 @@ -84,6 +333,22 @@ define <2 x i32> @fcmp_oeq_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_oeq_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp s0, s2, [sp, #16] +; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #8] +; NONEON-NOSVE-NEXT: fcmp s3, s2 +; NONEON-NOSVE-NEXT: csetm w9, eq +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %cmp = fcmp oeq <2 x float> %op1, %op2 %sext = sext <2 x i1> %cmp to <2 x i32> ret <2 x i32> %sext @@ -99,6 +364,28 @@ define <4 x i32> @fcmp_oeq_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_oeq_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp s0, s2, [sp, #24] +; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #8] +; NONEON-NOSVE-NEXT: fcmp s3, s2 +; NONEON-NOSVE-NEXT: csetm w9, eq +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldp s0, s2, [sp, #16] +; NONEON-NOSVE-NEXT: ldp s1, s3, [sp] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: fcmp s3, s2 +; NONEON-NOSVE-NEXT: csetm w9, eq +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %cmp = fcmp oeq <4 x float> %op1, %op2 %sext = sext <4 x i1> %cmp to <4 x i32> ret <4 x i32> %sext @@ -116,6 +403,47 @@ define void @fcmp_oeq_v8f32(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_oeq_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp s0, s2, [sp, #56] +; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #40] +; NONEON-NOSVE-NEXT: fcmp s3, s2 +; NONEON-NOSVE-NEXT: csetm w9, eq +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldp s0, s2, [sp, #48] +; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #32] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] +; NONEON-NOSVE-NEXT: fcmp s3, s2 +; NONEON-NOSVE-NEXT: csetm w9, eq +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldp s0, s2, [sp, #24] +; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #8] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] +; NONEON-NOSVE-NEXT: fcmp s3, s2 +; NONEON-NOSVE-NEXT: csetm w9, eq +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldp s0, s2, [sp, #16] +; NONEON-NOSVE-NEXT: ldp s1, s3, [sp] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] +; NONEON-NOSVE-NEXT: fcmp s3, s2 +; NONEON-NOSVE-NEXT: csetm w9, eq +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %cmp = fcmp oeq <8 x float> %op1, %op2 @@ -132,6 +460,17 @@ define <1 x i64> @fcmp_oeq_v1f64(<1 x double> %op1, <1 x double> %op2) { ; CHECK-NEXT: mov z0.d, x8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_oeq_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fcmp d0, d1 +; NONEON-NOSVE-NEXT: csetm x8, eq +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %cmp = fcmp oeq <1 x double> %op1, %op2 %sext = sext <1 x i1> %cmp to <1 x i64> ret <1 x i64> %sext @@ -147,6 +486,21 @@ define <2 x i64> @fcmp_oeq_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_oeq_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp d0, d2, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d1, d3, [sp] +; NONEON-NOSVE-NEXT: fcmp d3, d2 +; NONEON-NOSVE-NEXT: csetm x9, eq +; NONEON-NOSVE-NEXT: fcmp d1, d0 +; NONEON-NOSVE-NEXT: csetm x8, eq +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %cmp = fcmp oeq <2 x double> %op1, %op2 %sext = sext <2 x i1> %cmp to <2 x i64> ret <2 x i64> %sext @@ -164,6 +518,33 @@ define void @fcmp_oeq_v4f64(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_oeq_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d0, d2, [sp, #48] +; NONEON-NOSVE-NEXT: ldp d1, d3, [sp, #32] +; NONEON-NOSVE-NEXT: fcmp d3, d2 +; NONEON-NOSVE-NEXT: csetm x9, eq +; NONEON-NOSVE-NEXT: fcmp d1, d0 +; NONEON-NOSVE-NEXT: ldp d0, d2, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d1, d3, [sp] +; NONEON-NOSVE-NEXT: csetm x8, eq +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #80] +; NONEON-NOSVE-NEXT: fcmp d3, d2 +; NONEON-NOSVE-NEXT: csetm x9, eq +; NONEON-NOSVE-NEXT: fcmp d1, d0 +; NONEON-NOSVE-NEXT: csetm x8, eq +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %cmp = fcmp oeq <4 x double> %op1, %op2 @@ -192,6 +573,147 @@ define void @fcmp_ueq_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_ueq_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp ueq <16 x half> %op1, %op2 @@ -220,6 +742,147 @@ define void @fcmp_one_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_one_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp one <16 x half> %op1, %op2 @@ -244,6 +907,131 @@ define void @fcmp_une_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_une_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp une <16 x half> %op1, %op2 @@ -268,6 +1056,131 @@ define void @fcmp_ogt_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_ogt_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp ogt <16 x half> %op1, %op2 @@ -295,6 +1208,131 @@ define void @fcmp_ugt_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: eor z0.d, z2.d, z0.d ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_ugt_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, hi +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, hi +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, hi +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, hi +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, hi +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, hi +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, hi +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, hi +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, hi +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, hi +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, hi +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, hi +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, hi +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, hi +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, hi +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, hi +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp ugt <16 x half> %op1, %op2 @@ -319,6 +1357,131 @@ define void @fcmp_olt_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_olt_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp olt <16 x half> %op1, %op2 @@ -346,6 +1509,131 @@ define void @fcmp_ult_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: eor z0.d, z2.d, z0.d ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_ult_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp ult <16 x half> %op1, %op2 @@ -370,6 +1658,131 @@ define void @fcmp_oge_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_oge_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp oge <16 x half> %op1, %op2 @@ -397,6 +1810,131 @@ define void @fcmp_uge_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: eor z0.d, z2.d, z0.d ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_uge_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, pl +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, pl +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, pl +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, pl +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, pl +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, pl +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, pl +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, pl +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, pl +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, pl +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, pl +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, pl +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, pl +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, pl +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, pl +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, pl +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp uge <16 x half> %op1, %op2 @@ -421,6 +1959,131 @@ define void @fcmp_ole_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_ole_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ls +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ls +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ls +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ls +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ls +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ls +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ls +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ls +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ls +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ls +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ls +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ls +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ls +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ls +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ls +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, ls +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp ole <16 x half> %op1, %op2 @@ -448,6 +2111,131 @@ define void @fcmp_ule_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: eor z0.d, z2.d, z0.d ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_ule_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp ule <16 x half> %op1, %op2 @@ -472,6 +2260,131 @@ define void @fcmp_uno_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_uno_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vs +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vs +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vs +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vs +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vs +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vs +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vs +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vs +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vs +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vs +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vs +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vs +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vs +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vs +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vs +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, vs +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp uno <16 x half> %op1, %op2 @@ -499,6 +2412,131 @@ define void @fcmp_ord_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: eor z0.d, z2.d, z0.d ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_ord_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, vc +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp ord <16 x half> %op1, %op2 @@ -523,6 +2561,131 @@ define void @fcmp_eq_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_eq_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp fast oeq <16 x half> %op1, %op2 @@ -547,6 +2710,131 @@ define void @fcmp_ne_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_ne_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp fast one <16 x half> %op1, %op2 @@ -571,6 +2859,131 @@ define void @fcmp_gt_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_gt_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp fast ogt <16 x half> %op1, %op2 @@ -595,6 +3008,131 @@ define void @fcmp_lt_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_lt_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp fast olt <16 x half> %op1, %op2 @@ -619,6 +3157,131 @@ define void @fcmp_ge_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_ge_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp fast oge <16 x half> %op1, %op2 @@ -643,6 +3306,131 @@ define void @fcmp_le_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_le_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp fast ole <16 x half> %op1, %op2 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll index 57d072a7bcd68..2c08977320e84 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -17,6 +18,32 @@ define void @fp_convert_combine_crash(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fp_convert_combine_crash: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0, #3 +; NONEON-NOSVE-NEXT: ldp s0, s2, [sp, #16] +; NONEON-NOSVE-NEXT: fcvtzs w9, s1, #3 +; NONEON-NOSVE-NEXT: fcvtzs w10, s2, #3 +; NONEON-NOSVE-NEXT: fcvtzs w11, s0, #3 +; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp s0, s3, [sp] +; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #56] +; NONEON-NOSVE-NEXT: fcvtzs w12, s1, #3 +; NONEON-NOSVE-NEXT: fcvtzs w8, s2, #3 +; NONEON-NOSVE-NEXT: stp w11, w10, [sp, #48] +; NONEON-NOSVE-NEXT: fcvtzs w9, s3, #3 +; NONEON-NOSVE-NEXT: fcvtzs w10, s0, #3 +; NONEON-NOSVE-NEXT: stp w8, w12, [sp, #40] +; NONEON-NOSVE-NEXT: stp w10, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %f = load <8 x float>, ptr %a %mul.i = fmul <8 x float> %f, diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll index 6a2dc3c718252..9878910763a75 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -17,6 +18,20 @@ define void @fcvt_v2f16_to_v2f32(<2 x half> %a, ptr %b) { ; CHECK-NEXT: fcvt z0.s, p0/m, z0.h ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v2f16_to_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = fpext <2 x half> %a to <2 x float> store <2 x float> %res, ptr %b ret void @@ -31,6 +46,26 @@ define void @fcvt_v4f16_to_v4f32(<4 x half> %a, ptr %b) { ; CHECK-NEXT: fcvt z0.s, p0/m, z0.h ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v4f16_to_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fpext <4 x half> %a to <4 x float> store <4 x float> %res, ptr %b ret void @@ -48,6 +83,37 @@ define void @fcvt_v8f16_to_v8f32(<8 x half> %a, ptr %b) { ; CHECK-NEXT: fcvt z0.s, p0/m, z0.h ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v8f16_to_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %res = fpext <8 x half> %a to <8 x float> store <8 x float> %res, ptr %b ret void @@ -72,6 +138,61 @@ define void @fcvt_v16f16_to_v16f32(<16 x half> %a, ptr %b) { ; CHECK-NEXT: stp q3, q0, [x0] ; CHECK-NEXT: stp q2, q1, [x0, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v16f16_to_v16f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-128]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #88] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #80] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #72] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #64] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #120] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #112] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #104] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q2, q3, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret %res = fpext <16 x half> %a to <16 x float> store <16 x float> %res, ptr %b ret void @@ -90,6 +211,24 @@ define void @fcvt_v2f16_v2f32(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt z0.s, p0/m, z0.h ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v2f16_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr w8, [x0] +; NONEON-NOSVE-NEXT: str w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: str d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x half>, ptr %a %res = fpext <2 x half> %op1 to <2 x float> store <2 x float> %res, ptr %b @@ -104,6 +243,27 @@ define void @fcvt_v4f16_v4f32(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt z0.s, p0/m, z0.h ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v4f16_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x half>, ptr %a %res = fpext <4 x half> %op1 to <4 x float> store <4 x float> %res, ptr %b @@ -121,6 +281,38 @@ define void @fcvt_v8f16_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt z1.s, p0/m, z1.h ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v8f16_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x half>, ptr %a %res = fpext <8 x half> %op1 to <8 x float> store <8 x float> %res, ptr %b @@ -145,6 +337,62 @@ define void @fcvt_v16f16_v16f32(ptr %a, ptr %b) { ; CHECK-NEXT: stp q0, q1, [x1, #32] ; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v16f16_v16f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-128]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #88] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #80] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #72] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #64] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #120] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #112] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #104] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q2, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %res = fpext <16 x half> %op1 to <16 x float> store <16 x float> %res, ptr %b @@ -162,6 +410,18 @@ define void @fcvt_v1f16_v1f64(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt d0, h0 ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v1f16_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [x0] +; NONEON-NOSVE-NEXT: fcvt d0, h0 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <1 x half>, ptr %a %res = fpext <1 x half> %op1 to <1 x double> store <1 x double> %res, ptr %b @@ -176,6 +436,30 @@ define void @fcvt_v2f16_v2f64(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt z0.d, p0/m, z0.h ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v2f16_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr w8, [x0] +; NONEON-NOSVE-NEXT: str w8, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: str d0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt d1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x half>, ptr %a %res = fpext <2 x half> %op1 to <2 x double> store <2 x double> %res, ptr %b @@ -193,6 +477,39 @@ define void @fcvt_v4f16_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt z1.d, p0/m, z1.h ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v4f16_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #80 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt d1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt d1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x half>, ptr %a %res = fpext <4 x half> %op1 to <4 x double> store <4 x double> %res, ptr %b @@ -217,6 +534,65 @@ define void @fcvt_v8f16_v8f64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q0, q1, [x1, #32] ; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v8f16_v8f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #160 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #92] +; NONEON-NOSVE-NEXT: fcvt d1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #88] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #144] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #84] +; NONEON-NOSVE-NEXT: fcvt d1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #80] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #128] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #76] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #128] +; NONEON-NOSVE-NEXT: fcvt d1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #72] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #68] +; NONEON-NOSVE-NEXT: fcvt d1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #64] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q2, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x half>, ptr %a %res = fpext <8 x half> %op1 to <8 x double> store <8 x double> %res, ptr %b @@ -258,6 +634,119 @@ define void @fcvt_v16f16_v16f64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q4, q0, [x1, #32] ; CHECK-NEXT: stp q1, q2, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v16f16_v16f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #336 +; NONEON-NOSVE-NEXT: str x29, [sp, #320] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 336 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ldr x29, [sp, #320] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: stp q0, q1, [sp] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #40] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #96] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #88] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #80] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #72] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #66] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #64] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #128] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #70] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #68] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #120] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #112] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: fcvt s1, h0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #104] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #88] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #152] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #72] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #136] +; NONEON-NOSVE-NEXT: ldp d2, d1, [sp, #120] +; NONEON-NOSVE-NEXT: str d1, [sp, #328] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #104] +; NONEON-NOSVE-NEXT: str d0, [sp, #168] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #164] +; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #176] +; NONEON-NOSVE-NEXT: fcvt d1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #160] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #240] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #156] +; NONEON-NOSVE-NEXT: fcvt d1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #152] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #224] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #148] +; NONEON-NOSVE-NEXT: fcvt d1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #144] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #140] +; NONEON-NOSVE-NEXT: fcvt d1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #136] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #192] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #332] +; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #192] +; NONEON-NOSVE-NEXT: fcvt d1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #328] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #304] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #188] +; NONEON-NOSVE-NEXT: fcvt d1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #184] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #288] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #180] +; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #288] +; NONEON-NOSVE-NEXT: fcvt d1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #176] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #272] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #172] +; NONEON-NOSVE-NEXT: fcvt d1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #168] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #256] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #224] +; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #256] +; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32] +; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96] +; NONEON-NOSVE-NEXT: add sp, sp, #336 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %res = fpext <16 x half> %op1 to <16 x double> store <16 x double> %res, ptr %b @@ -275,6 +764,13 @@ define void @fcvt_v1f32_v1f64(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt d0, s0 ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v1f32_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr s0, [x0] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <1 x float>, ptr %a %res = fpext <1 x float> %op1 to <1 x double> store <1 x double> %res, ptr %b @@ -289,6 +785,22 @@ define void @fcvt_v2f32_v2f64(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt z0.d, p0/m, z0.s ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v2f32_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt d1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x float>, ptr %a %res = fpext <2 x float> %op1 to <2 x double> store <2 x double> %res, ptr %b @@ -306,6 +818,28 @@ define void @fcvt_v4f32_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt z1.d, p0/m, z1.s ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v4f32_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt d1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt d1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x float>, ptr %a %res = fpext <4 x float> %op1 to <4 x double> store <4 x double> %res, ptr %b @@ -330,6 +864,42 @@ define void @fcvt_v8f32_v8f64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q0, q1, [x1, #32] ; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v8f32_v8f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-128]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt d1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt d1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #60] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64] +; NONEON-NOSVE-NEXT: fcvt d1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #56] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #52] +; NONEON-NOSVE-NEXT: fcvt d1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #48] +; NONEON-NOSVE-NEXT: fcvt d0, s0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q2, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %res = fpext <8 x float> %op1 to <8 x double> store <8 x double> %res, ptr %b @@ -348,6 +918,25 @@ define void @fcvt_v2f32_v2f16(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt z0.h, p0/m, z0.s ; CHECK-NEXT: st1h { z0.s }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v2f32_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: str d0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: str w8, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x float>, ptr %a %res = fptrunc <2 x float> %op1 to <2 x half> store <2 x half> %res, ptr %b @@ -362,6 +951,28 @@ define void @fcvt_v4f32_v4f16(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt z0.h, p0/m, z0.s ; CHECK-NEXT: st1h { z0.s }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v4f32_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x float>, ptr %a %res = fptrunc <4 x float> %op1 to <4 x half> store <4 x half> %res, ptr %b @@ -379,6 +990,40 @@ define void @fcvt_v8f32_v8f16(ptr %a, ptr %b) { ; CHECK-NEXT: st1h { z0.s }, p0, [x1, x8, lsl #1] ; CHECK-NEXT: st1h { z1.s }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v8f32_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #38] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %res = fptrunc <8 x float> %op1 to <8 x half> store <8 x half> %res, ptr %b @@ -397,6 +1042,13 @@ define void @fcvt_v1f64_v1f16(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt z0.h, p0/m, z0.d ; CHECK-NEXT: st1h { z0.d }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v1f64_v1f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: fcvt h0, d0 +; NONEON-NOSVE-NEXT: str h0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <1 x double>, ptr %a %res = fptrunc <1 x double> %op1 to <1 x half> store <1 x half> %res, ptr %b @@ -411,6 +1063,24 @@ define void @fcvt_v2f64_v2f16(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt z0.h, p0/m, z0.d ; CHECK-NEXT: st1h { z0.d }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v2f64_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt h0, d0 +; NONEON-NOSVE-NEXT: str h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: fcvt h0, d0 +; NONEON-NOSVE-NEXT: str h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: str d0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: str w8, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x double>, ptr %a %res = fptrunc <2 x double> %op1 to <2 x half> store <2 x half> %res, ptr %b @@ -428,6 +1098,28 @@ define void @fcvt_v4f64_v4f16(ptr %a, ptr %b) { ; CHECK-NEXT: st1h { z0.d }, p0, [x1, x8, lsl #1] ; CHECK-NEXT: st1h { z1.d }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v4f64_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt h0, d0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt h0, d0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt h0, d0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: fcvt h0, d0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %res = fptrunc <4 x double> %op1 to <4 x half> store <4 x half> %res, ptr %b @@ -446,6 +1138,12 @@ define void @fcvt_v1f64_v1f32(<1 x double> %op1, ptr %b) { ; CHECK-NEXT: fcvt z0.s, p0/m, z0.d ; CHECK-NEXT: st1w { z0.d }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v1f64_v1f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvt s0, d0 +; NONEON-NOSVE-NEXT: str s0, [x0] +; NONEON-NOSVE-NEXT: ret %res = fptrunc <1 x double> %op1 to <1 x float> store <1 x float> %res, ptr %b ret void @@ -459,6 +1157,20 @@ define void @fcvt_v2f64_v2f32(<2 x double> %op1, ptr %b) { ; CHECK-NEXT: fcvt z0.s, p0/m, z0.d ; CHECK-NEXT: st1w { z0.d }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v2f64_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, d0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fptrunc <2 x double> %op1 to <2 x float> store <2 x float> %res, ptr %b ret void @@ -475,6 +1187,26 @@ define void @fcvt_v4f64_v4f32(ptr %a, ptr %b) { ; CHECK-NEXT: st1w { z0.d }, p0, [x1, x8, lsl #2] ; CHECK-NEXT: st1w { z1.d }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v4f64_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, d0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, d0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %res = fptrunc <4 x double> %op1 to <4 x float> store <4 x float> %res, ptr %b diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll index 153a04f486571..680cb4fb0a791 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -17,6 +18,63 @@ define <4 x half> @fma_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3) ; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: str d2, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h6, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h7, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: ldr h4, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: ldr h5, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: ldr h3, [sp] +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fmul s1, s3, s1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: fmul s2, s7, s6 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s2, s0 +; NONEON-NOSVE-NEXT: fmul s2, s5, s4 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s2, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %mul = fmul contract <4 x half> %op1, %op2 %res = fadd contract <4 x half> %mul, %op3 ret <4 x half> %res @@ -32,6 +90,111 @@ define <8 x half> @fma_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3) ; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: str q2, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h22, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h23, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s3, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: ldr h20, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s22, h22 +; NONEON-NOSVE-NEXT: fcvt s23, h23 +; NONEON-NOSVE-NEXT: ldr h21, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s20, h20 +; NONEON-NOSVE-NEXT: ldr h18, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h19, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h16, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h17, [sp, #6] +; NONEON-NOSVE-NEXT: fmul s5, s1, s3 +; NONEON-NOSVE-NEXT: fcvt s21, h21 +; NONEON-NOSVE-NEXT: fcvt s18, h18 +; NONEON-NOSVE-NEXT: fcvt s19, h19 +; NONEON-NOSVE-NEXT: fcvt s16, h16 +; NONEON-NOSVE-NEXT: fcvt s17, h17 +; NONEON-NOSVE-NEXT: ldr h6, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h7, [sp, #4] +; NONEON-NOSVE-NEXT: ldr h3, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h4, [sp, #2] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt h5, s5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fmul s3, s4, s3 +; NONEON-NOSVE-NEXT: fmul s0, s1, s0 +; NONEON-NOSVE-NEXT: fadd s2, s5, s2 +; NONEON-NOSVE-NEXT: fmul s5, s23, s22 +; NONEON-NOSVE-NEXT: fcvt h3, s3 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt h5, s5 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: str h2, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fadd s2, s5, s2 +; NONEON-NOSVE-NEXT: fmul s5, s21, s20 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt h5, s5 +; NONEON-NOSVE-NEXT: str h2, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fadd s2, s5, s2 +; NONEON-NOSVE-NEXT: fmul s5, s19, s18 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt h5, s5 +; NONEON-NOSVE-NEXT: str h2, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fadd s2, s5, s2 +; NONEON-NOSVE-NEXT: fmul s5, s17, s16 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt h5, s5 +; NONEON-NOSVE-NEXT: str h2, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fadd s2, s5, s2 +; NONEON-NOSVE-NEXT: fmul s5, s7, s6 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt h5, s5 +; NONEON-NOSVE-NEXT: str h2, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fadd s2, s5, s2 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: str h2, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fadd s2, s3, s2 +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: str h1, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %mul = fmul contract <8 x half> %op1, %op2 %res = fadd contract <8 x half> %mul, %op3 ret <8 x half> %res @@ -49,6 +212,232 @@ define void @fma_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: fmla z1.h, p0/m, z3.h, z4.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #208 +; NONEON-NOSVE-NEXT: stp d15, d14, [sp, #144] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp d13, d12, [sp, #160] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp d11, d10, [sp, #176] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp d9, d8, [sp, #192] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 208 +; NONEON-NOSVE-NEXT: .cfi_offset b8, -8 +; NONEON-NOSVE-NEXT: .cfi_offset b9, -16 +; NONEON-NOSVE-NEXT: .cfi_offset b10, -24 +; NONEON-NOSVE-NEXT: .cfi_offset b11, -32 +; NONEON-NOSVE-NEXT: .cfi_offset b12, -40 +; NONEON-NOSVE-NEXT: .cfi_offset b13, -48 +; NONEON-NOSVE-NEXT: .cfi_offset b14, -56 +; NONEON-NOSVE-NEXT: .cfi_offset b15, -64 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] +; NONEON-NOSVE-NEXT: ldp q18, q19, [x2] +; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #16] +; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #64] +; NONEON-NOSVE-NEXT: ldr h24, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h25, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #94] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #78] +; NONEON-NOSVE-NEXT: str q19, [sp, #96] +; NONEON-NOSVE-NEXT: str q18, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h18, [sp, #110] +; NONEON-NOSVE-NEXT: ldr h15, [sp, #92] +; NONEON-NOSVE-NEXT: fcvt s20, h0 +; NONEON-NOSVE-NEXT: fcvt s21, h1 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s18, h18 +; NONEON-NOSVE-NEXT: ldr h13, [sp, #90] +; NONEON-NOSVE-NEXT: ldr h14, [sp, #74] +; NONEON-NOSVE-NEXT: str h0, [sp, #14] // 2-byte Folded Spill +; NONEON-NOSVE-NEXT: ldr h0, [sp, #76] +; NONEON-NOSVE-NEXT: ldr h11, [sp, #88] +; NONEON-NOSVE-NEXT: ldr h12, [sp, #72] +; NONEON-NOSVE-NEXT: ldr h9, [sp, #86] +; NONEON-NOSVE-NEXT: ldr h10, [sp, #70] +; NONEON-NOSVE-NEXT: fmul s30, s21, s20 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: ldr h31, [sp, #84] +; NONEON-NOSVE-NEXT: ldr h8, [sp, #68] +; NONEON-NOSVE-NEXT: ldr h28, [sp, #82] +; NONEON-NOSVE-NEXT: ldr h29, [sp, #66] +; NONEON-NOSVE-NEXT: ldr h26, [sp, #80] +; NONEON-NOSVE-NEXT: ldr h27, [sp, #64] +; NONEON-NOSVE-NEXT: ldr h22, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h23, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h20, [sp, #42] +; NONEON-NOSVE-NEXT: ldr h21, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt h19, s30 +; NONEON-NOSVE-NEXT: fcvt s30, h15 +; NONEON-NOSVE-NEXT: ldr h16, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h17, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h6, [sp, #38] +; NONEON-NOSVE-NEXT: ldr h7, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s16, h16 +; NONEON-NOSVE-NEXT: ldr h4, [sp, #36] +; NONEON-NOSVE-NEXT: ldr h5, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s17, h17 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: fcvt s19, h19 +; NONEON-NOSVE-NEXT: fmul s0, s0, s30 +; NONEON-NOSVE-NEXT: fcvt s30, h14 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #34] +; NONEON-NOSVE-NEXT: ldr h3, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #16] +; NONEON-NOSVE-NEXT: fmul s16, s17, s16 +; NONEON-NOSVE-NEXT: fmul s6, s7, s6 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fadd s18, s19, s18 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s19, h13 +; NONEON-NOSVE-NEXT: fmul s4, s5, s4 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: ldp d15, d14, [sp, #144] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: fcvt h16, s16 +; NONEON-NOSVE-NEXT: fcvt h6, s6 +; NONEON-NOSVE-NEXT: fcvt h18, s18 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt h4, s4 +; NONEON-NOSVE-NEXT: fmul s2, s3, s2 +; NONEON-NOSVE-NEXT: ldr h3, [sp, #14] // 2-byte Folded Reload +; NONEON-NOSVE-NEXT: fcvt s16, h16 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: str h18, [sp, #142] +; NONEON-NOSVE-NEXT: ldr h18, [sp, #108] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s18, h18 +; NONEON-NOSVE-NEXT: fmul s1, s1, s3 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fadd s0, s0, s18 +; NONEON-NOSVE-NEXT: fmul s18, s30, s19 +; NONEON-NOSVE-NEXT: fcvt s19, h11 +; NONEON-NOSVE-NEXT: fcvt s30, h12 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: ldp d13, d12, [sp, #160] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h18, s18 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: str h0, [sp, #140] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #106] +; NONEON-NOSVE-NEXT: fcvt s18, h18 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s18, s0 +; NONEON-NOSVE-NEXT: fmul s18, s30, s19 +; NONEON-NOSVE-NEXT: fcvt s19, h9 +; NONEON-NOSVE-NEXT: fcvt s30, h10 +; NONEON-NOSVE-NEXT: ldp d11, d10, [sp, #176] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h18, s18 +; NONEON-NOSVE-NEXT: str h0, [sp, #138] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #104] +; NONEON-NOSVE-NEXT: fcvt s18, h18 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s18, s0 +; NONEON-NOSVE-NEXT: fmul s18, s30, s19 +; NONEON-NOSVE-NEXT: fcvt s19, h31 +; NONEON-NOSVE-NEXT: fcvt s30, h8 +; NONEON-NOSVE-NEXT: ldp d9, d8, [sp, #192] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h18, s18 +; NONEON-NOSVE-NEXT: str h0, [sp, #136] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #102] +; NONEON-NOSVE-NEXT: fcvt s18, h18 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s18, s0 +; NONEON-NOSVE-NEXT: fmul s18, s30, s19 +; NONEON-NOSVE-NEXT: fcvt s19, h28 +; NONEON-NOSVE-NEXT: fcvt s28, h29 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h18, s18 +; NONEON-NOSVE-NEXT: str h0, [sp, #134] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #100] +; NONEON-NOSVE-NEXT: fcvt s18, h18 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s18, s0 +; NONEON-NOSVE-NEXT: fmul s18, s28, s19 +; NONEON-NOSVE-NEXT: fcvt s19, h26 +; NONEON-NOSVE-NEXT: fcvt s26, h27 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h18, s18 +; NONEON-NOSVE-NEXT: str h0, [sp, #132] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #98] +; NONEON-NOSVE-NEXT: fcvt s18, h18 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s18, s0 +; NONEON-NOSVE-NEXT: fmul s18, s26, s19 +; NONEON-NOSVE-NEXT: fcvt s19, h24 +; NONEON-NOSVE-NEXT: fcvt s24, h25 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h18, s18 +; NONEON-NOSVE-NEXT: str h0, [sp, #130] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #96] +; NONEON-NOSVE-NEXT: fcvt s18, h18 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s18, s0 +; NONEON-NOSVE-NEXT: fmul s18, s24, s19 +; NONEON-NOSVE-NEXT: fcvt s19, h22 +; NONEON-NOSVE-NEXT: fcvt s22, h23 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h18, s18 +; NONEON-NOSVE-NEXT: str h0, [sp, #128] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: fcvt s18, h18 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s18, s0 +; NONEON-NOSVE-NEXT: fmul s18, s22, s19 +; NONEON-NOSVE-NEXT: fcvt s19, h20 +; NONEON-NOSVE-NEXT: fcvt s20, h21 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h18, s18 +; NONEON-NOSVE-NEXT: str h0, [sp, #126] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: fcvt s18, h18 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s18, s0 +; NONEON-NOSVE-NEXT: fmul s18, s20, s19 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h18, s18 +; NONEON-NOSVE-NEXT: str h0, [sp, #124] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: fcvt s18, h18 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s18, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #122] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s16, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #120] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s6, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #118] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s4, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #116] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s2, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #114] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #112] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #112] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #208 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %op3 = load <16 x half>, ptr %c @@ -68,6 +457,23 @@ define <2 x float> @fma_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %o ; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #8] +; NONEON-NOSVE-NEXT: str d0, [sp] +; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #8] +; NONEON-NOSVE-NEXT: ldp s2, s4, [sp] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %mul = fmul contract <2 x float> %op1, %op2 %res = fadd contract <2 x float> %mul, %op3 ret <2 x float> %res @@ -83,6 +489,30 @@ define <4 x float> @fma_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %o ; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: stp q1, q2, [sp, #16] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #24] +; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #44] +; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #40] +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #16] +; NONEON-NOSVE-NEXT: ldp s2, s4, [sp] +; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #56] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #36] +; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #32] +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %mul = fmul contract <4 x float> %op1, %op2 %res = fadd contract <4 x float> %mul, %op3 ret <4 x float> %res @@ -100,6 +530,49 @@ define void @fma_v8f32(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: fmla z1.s, p0/m, z3.s, z4.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #128 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x2] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q4, q5, [x0] +; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q4, q2, [sp] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #92] +; NONEON-NOSVE-NEXT: stp q1, q5, [sp, #32] +; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #72] +; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #56] +; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #88] +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #64] +; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #48] +; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #120] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #84] +; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #80] +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #24] +; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #8] +; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #112] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #44] +; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #40] +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #16] +; NONEON-NOSVE-NEXT: ldp s2, s4, [sp] +; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #104] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #36] +; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #32] +; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %op3 = load <8 x float>, ptr %c @@ -114,6 +587,16 @@ define <1 x double> @fma_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x double ; CHECK: // %bb.0: ; CHECK-NEXT: fmadd d0, d0, d1, d2 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmadd d0, d0, d1, d2 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %mul = fmul contract <1 x double> %op1, %op2 %res = fadd contract <1 x double> %mul, %op3 ret <1 x double> %res @@ -129,6 +612,23 @@ define <2 x double> @fma_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double ; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: stp q1, q2, [sp, #16] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: ldp d1, d3, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d2, d4, [sp] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: fmadd d5, d4, d3, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #32] +; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d5, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %mul = fmul contract <2 x double> %op1, %op2 %res = fadd contract <2 x double> %mul, %op3 ret <2 x double> %res @@ -146,6 +646,35 @@ define void @fma_v4f64(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: fmla z1.d, p0/m, z3.d, z4.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #128 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x2] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q4, q5, [x0] +; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q4, q2, [sp] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #88] +; NONEON-NOSVE-NEXT: stp q1, q5, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d3, [sp, #64] +; NONEON-NOSVE-NEXT: ldp d2, d4, [sp, #48] +; NONEON-NOSVE-NEXT: fmadd d5, d4, d3, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #80] +; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0 +; NONEON-NOSVE-NEXT: ldp d1, d3, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d2, d4, [sp] +; NONEON-NOSVE-NEXT: stp d0, d5, [sp, #112] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: fmadd d5, d4, d3, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #32] +; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d5, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %op3 = load <4 x double>, ptr %c diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll index 6945a6102c055..775cac272cde9 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -17,6 +18,43 @@ define <4 x half> @fmaxnm_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxnm_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %op1, <4 x half> %op2) ret <4 x half> %res } @@ -30,6 +68,70 @@ define <8 x half> @fmaxnm_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxnm_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #38] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.maxnum.v8f16(<8 x half> %op1, <8 x half> %op2) ret <8 x half> %res } @@ -45,6 +147,131 @@ define void @fmaxnm_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: fmaxnm z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxnm_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #94] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #92] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #90] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #88] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #86] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #84] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #82] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #80] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #78] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #76] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #74] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #72] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #70] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #68] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #66] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %res = call <16 x half> @llvm.maxnum.v16f16(<16 x half> %op1, <16 x half> %op2) @@ -61,6 +288,21 @@ define <2 x float> @fmaxnm_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxnm_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fmaxnm s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %op1, <2 x float> %op2) ret <2 x float> %res } @@ -74,6 +316,26 @@ define <4 x float> @fmaxnm_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxnm_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fmaxnm s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fmaxnm s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %op1, <4 x float> %op2) ret <4 x float> %res } @@ -89,6 +351,43 @@ define void @fmaxnm_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: fmaxnm z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxnm_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #60] +; NONEON-NOSVE-NEXT: fmaxnm s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #56] +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #32] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #88] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #52] +; NONEON-NOSVE-NEXT: fmaxnm s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #48] +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #80] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fmaxnm s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #72] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fmaxnm s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %res = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %op1, <8 x float> %op2) @@ -101,6 +400,16 @@ define <1 x double> @fmaxnm_v1f64(<1 x double> %op1, <1 x double> %op2) { ; CHECK: // %bb.0: ; CHECK-NEXT: fmaxnm d0, d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxnm_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmaxnm d0, d0, d1 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <1 x double> @llvm.maxnum.v1f64(<1 x double> %op1, <1 x double> %op2) ret <1 x double> %res } @@ -114,6 +423,20 @@ define <2 x double> @fmaxnm_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxnm_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: fmaxnm d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: fmaxnm d0, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %op1, <2 x double> %op2) ret <2 x double> %res } @@ -129,6 +452,31 @@ define void @fmaxnm_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fmaxnm z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxnm_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp, #32] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #56] +; NONEON-NOSVE-NEXT: fmaxnm d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #48] +; NONEON-NOSVE-NEXT: fmaxnm d0, d1, d0 +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp] +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #80] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: fmaxnm d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: fmaxnm d0, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %res = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %op1, <4 x double> %op2) @@ -149,6 +497,43 @@ define <4 x half> @fminnm_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminnm_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.minnum.v4f16(<4 x half> %op1, <4 x half> %op2) ret <4 x half> %res } @@ -162,6 +547,70 @@ define <8 x half> @fminnm_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminnm_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #38] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.minnum.v8f16(<8 x half> %op1, <8 x half> %op2) ret <8 x half> %res } @@ -177,6 +626,131 @@ define void @fminnm_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: fminnm z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminnm_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #94] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #92] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #90] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #88] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #86] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #84] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #82] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #80] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #78] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #76] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #74] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #72] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #70] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #68] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #66] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %res = call <16 x half> @llvm.minnum.v16f16(<16 x half> %op1, <16 x half> %op2) @@ -193,6 +767,21 @@ define <2 x float> @fminnm_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminnm_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fminnm s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.minnum.v2f32(<2 x float> %op1, <2 x float> %op2) ret <2 x float> %res } @@ -206,6 +795,26 @@ define <4 x float> @fminnm_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminnm_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fminnm s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fminnm s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.minnum.v4f32(<4 x float> %op1, <4 x float> %op2) ret <4 x float> %res } @@ -221,6 +830,43 @@ define void @fminnm_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: fminnm z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminnm_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #60] +; NONEON-NOSVE-NEXT: fminnm s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #56] +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #32] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #88] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #52] +; NONEON-NOSVE-NEXT: fminnm s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #48] +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #80] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fminnm s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #72] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fminnm s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %res = call <8 x float> @llvm.minnum.v8f32(<8 x float> %op1, <8 x float> %op2) @@ -233,6 +879,16 @@ define <1 x double> @fminnm_v1f64(<1 x double> %op1, <1 x double> %op2) { ; CHECK: // %bb.0: ; CHECK-NEXT: fminnm d0, d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminnm_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fminnm d0, d0, d1 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <1 x double> @llvm.minnum.v1f64(<1 x double> %op1, <1 x double> %op2) ret <1 x double> %res } @@ -246,6 +902,20 @@ define <2 x double> @fminnm_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminnm_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: fminnm d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: fminnm d0, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.minnum.v2f64(<2 x double> %op1, <2 x double> %op2) ret <2 x double> %res } @@ -261,6 +931,31 @@ define void @fminnm_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fminnm z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminnm_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp, #32] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #56] +; NONEON-NOSVE-NEXT: fminnm d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #48] +; NONEON-NOSVE-NEXT: fminnm d0, d1, d0 +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp] +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #80] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: fminnm d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: fminnm d0, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %res = call <4 x double> @llvm.minnum.v4f64(<4 x double> %op1, <4 x double> %op2) @@ -281,6 +976,43 @@ define <4 x half> @fmax_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmax_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.maximum.v4f16(<4 x half> %op1, <4 x half> %op2) ret <4 x half> %res } @@ -294,6 +1026,70 @@ define <8 x half> @fmax_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmax_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #38] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.maximum.v8f16(<8 x half> %op1, <8 x half> %op2) ret <8 x half> %res } @@ -309,6 +1105,131 @@ define void @fmax_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: fmax z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmax_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #94] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #92] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #90] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #88] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #86] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #84] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #82] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #80] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #78] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #76] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #74] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #72] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #70] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #68] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #66] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %res = call <16 x half> @llvm.maximum.v16f16(<16 x half> %op1, <16 x half> %op2) @@ -325,6 +1246,21 @@ define <2 x float> @fmax_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmax_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fmax s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.maximum.v2f32(<2 x float> %op1, <2 x float> %op2) ret <2 x float> %res } @@ -338,6 +1274,26 @@ define <4 x float> @fmax_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmax_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fmax s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fmax s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.maximum.v4f32(<4 x float> %op1, <4 x float> %op2) ret <4 x float> %res } @@ -353,6 +1309,43 @@ define void @fmax_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: fmax z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmax_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #60] +; NONEON-NOSVE-NEXT: fmax s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #56] +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #32] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #88] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #52] +; NONEON-NOSVE-NEXT: fmax s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #48] +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #80] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fmax s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #72] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fmax s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %res = call <8 x float> @llvm.maximum.v8f32(<8 x float> %op1, <8 x float> %op2) @@ -365,6 +1358,16 @@ define <1 x double> @fmax_v1f64(<1 x double> %op1, <1 x double> %op2) { ; CHECK: // %bb.0: ; CHECK-NEXT: fmax d0, d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmax_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmax d0, d0, d1 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <1 x double> @llvm.maximum.v1f64(<1 x double> %op1, <1 x double> %op2) ret <1 x double> %res } @@ -378,6 +1381,20 @@ define <2 x double> @fmax_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmax_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: fmax d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: fmax d0, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.maximum.v2f64(<2 x double> %op1, <2 x double> %op2) ret <2 x double> %res } @@ -393,6 +1410,31 @@ define void @fmax_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fmax z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmax_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp, #32] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #56] +; NONEON-NOSVE-NEXT: fmax d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #48] +; NONEON-NOSVE-NEXT: fmax d0, d1, d0 +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp] +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #80] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: fmax d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: fmax d0, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %res = call <4 x double> @llvm.maximum.v4f64(<4 x double> %op1, <4 x double> %op2) @@ -413,6 +1455,43 @@ define <4 x half> @fmin_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmin_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.minimum.v4f16(<4 x half> %op1, <4 x half> %op2) ret <4 x half> %res } @@ -426,6 +1505,70 @@ define <8 x half> @fmin_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmin_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #38] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.minimum.v8f16(<8 x half> %op1, <8 x half> %op2) ret <8 x half> %res } @@ -441,6 +1584,131 @@ define void @fmin_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: fmin z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmin_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #94] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #92] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #90] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #88] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #86] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #84] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #82] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #80] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #78] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #76] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #74] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #72] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #70] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #68] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #66] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %res = call <16 x half> @llvm.minimum.v16f16(<16 x half> %op1, <16 x half> %op2) @@ -457,6 +1725,21 @@ define <2 x float> @fmin_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmin_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fmin s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.minimum.v2f32(<2 x float> %op1, <2 x float> %op2) ret <2 x float> %res } @@ -470,6 +1753,26 @@ define <4 x float> @fmin_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmin_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fmin s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fmin s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.minimum.v4f32(<4 x float> %op1, <4 x float> %op2) ret <4 x float> %res } @@ -485,6 +1788,43 @@ define void @fmin_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: fmin z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmin_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #60] +; NONEON-NOSVE-NEXT: fmin s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #56] +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #32] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #88] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #52] +; NONEON-NOSVE-NEXT: fmin s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #48] +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #80] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fmin s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #72] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fmin s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %res = call <8 x float> @llvm.minimum.v8f32(<8 x float> %op1, <8 x float> %op2) @@ -497,6 +1837,16 @@ define <1 x double> @fmin_v1f64(<1 x double> %op1, <1 x double> %op2) { ; CHECK: // %bb.0: ; CHECK-NEXT: fmin d0, d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmin_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmin d0, d0, d1 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <1 x double> @llvm.minimum.v1f64(<1 x double> %op1, <1 x double> %op2) ret <1 x double> %res } @@ -510,6 +1860,20 @@ define <2 x double> @fmin_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmin_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: fmin d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: fmin d0, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.minimum.v2f64(<2 x double> %op1, <2 x double> %op2) ret <2 x double> %res } @@ -525,6 +1889,31 @@ define void @fmin_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fmin z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmin_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp, #32] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #56] +; NONEON-NOSVE-NEXT: fmin d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #48] +; NONEON-NOSVE-NEXT: fmin d0, d1, d0 +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp] +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #80] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: fmin d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: fmin d0, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %res = call <4 x double> @llvm.minimum.v4f64(<4 x double> %op1, <4 x double> %op2) diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll index e239ff5e35fd3..f081d4ac65b27 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sme-fa64 -force-streaming-compatible < %s | FileCheck %s -check-prefix=FA64 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s -check-prefix=NO-FA64 +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -26,6 +27,34 @@ define half @fadda_v4f16(half %start, <4 x half> %a) { ; NO-FA64-NEXT: fadd h0, h0, h2 ; NO-FA64-NEXT: fadd h0, h0, h1 ; NO-FA64-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadda_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a) ret half %res } diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll index 78ae7bb6cf30a..4eaaee7ce5055 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -19,6 +20,34 @@ define half @fadda_v4f16(half %start, <4 x half> %a) { ; CHECK-NEXT: fadd h0, h0, h2 ; CHECK-NEXT: fadd h0, h0, h1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadda_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a) ret half %res } @@ -43,6 +72,53 @@ define half @fadda_v8f16(half %start, <8 x half> %a) { ; CHECK-NEXT: fadd h0, h0, h2 ; CHECK-NEXT: fadd h0, h0, h1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadda_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q1, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call half @llvm.vector.reduce.fadd.v8f16(half %start, <8 x half> %a) ret half %res } @@ -83,6 +159,97 @@ define half @fadda_v16f16(half %start, ptr %a) { ; CHECK-NEXT: fadd h0, h0, h2 ; CHECK-NEXT: fadd h0, h0, h1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadda_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: str q1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: str q1, [sp] +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op) ret half %res @@ -96,6 +263,17 @@ define float @fadda_v2f32(float %start, <2 x float> %a) { ; CHECK-NEXT: mov z1.s, z1.s[1] ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadda_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8] +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call float @llvm.vector.reduce.fadd.v2f32(float %start, <2 x float> %a) ret float %res } @@ -112,6 +290,19 @@ define float @fadda_v4f32(float %start, <4 x float> %a) { ; CHECK-NEXT: fadd s0, s0, s2 ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadda_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q1, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp s2, s1, [sp] +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8] +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %a) ret float %res } @@ -136,6 +327,29 @@ define float @fadda_v8f32(float %start, ptr %a) { ; CHECK-NEXT: fadd s0, s0, s2 ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadda_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: str q1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: str q1, [sp] +; NONEON-NOSVE-NEXT: ldp s2, s1, [sp] +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8] +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #16] +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #24] +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op) ret float %res @@ -146,6 +360,11 @@ define double @fadda_v1f64(double %start, <1 x double> %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fadd d0, d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadda_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fadd d0, d0, d1 +; NONEON-NOSVE-NEXT: ret %res = call double @llvm.vector.reduce.fadd.v1f64(double %start, <1 x double> %a) ret double %res } @@ -158,6 +377,15 @@ define double @fadda_v2f64(double %start, <2 x double> %a) { ; CHECK-NEXT: mov z1.d, z1.d[1] ; CHECK-NEXT: fadd d0, d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadda_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q1, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp d2, d1, [sp], #16 +; NONEON-NOSVE-NEXT: fadd d0, d0, d2 +; NONEON-NOSVE-NEXT: fadd d0, d0, d1 +; NONEON-NOSVE-NEXT: ret %res = call double @llvm.vector.reduce.fadd.v2f64(double %start, <2 x double> %a) ret double %res } @@ -174,6 +402,23 @@ define double @fadda_v4f64(double %start, ptr %a) { ; CHECK-NEXT: mov z1.d, z1.d[1] ; CHECK-NEXT: fadd d0, d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadda_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: str q1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: str q1, [sp] +; NONEON-NOSVE-NEXT: ldp d2, d1, [sp] +; NONEON-NOSVE-NEXT: fadd d0, d0, d2 +; NONEON-NOSVE-NEXT: fadd d0, d0, d1 +; NONEON-NOSVE-NEXT: ldp d2, d1, [sp, #16] +; NONEON-NOSVE-NEXT: fadd d0, d0, d2 +; NONEON-NOSVE-NEXT: fadd d0, d0, d1 +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op) ret double %res @@ -191,6 +436,34 @@ define half @faddv_v4f16(half %start, <4 x half> %a) { ; CHECK-NEXT: faddv h1, p0, z1.h ; CHECK-NEXT: fadd h0, h0, h1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: faddv_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s1, s1, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s1, s1, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s1, s1, s2 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call fast half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a) ret half %res } @@ -203,6 +476,53 @@ define half @faddv_v8f16(half %start, <8 x half> %a) { ; CHECK-NEXT: faddv h1, p0, z1.h ; CHECK-NEXT: fadd h0, h0, h1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: faddv_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q1, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s1, s1, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s1, s1, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s1, s1, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s1, s1, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s1, s1, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s1, s1, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s1, s1, s2 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call fast half @llvm.vector.reduce.fadd.v8f16(half %start, <8 x half> %a) ret half %res } @@ -216,6 +536,94 @@ define half @faddv_v16f16(half %start, ptr %a) { ; CHECK-NEXT: faddv h1, p0, z1.h ; CHECK-NEXT: fadd h0, h0, h1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: faddv_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: ldr h3, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h4, [sp] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fadd s1, s2, s1 +; NONEON-NOSVE-NEXT: fadd s2, s4, s3 +; NONEON-NOSVE-NEXT: ldr h3, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h4, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fadd s3, s4, s3 +; NONEON-NOSVE-NEXT: ldr h4, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fadd s1, s2, s1 +; NONEON-NOSVE-NEXT: fcvt h2, s3 +; NONEON-NOSVE-NEXT: ldr h3, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fadd s3, s4, s3 +; NONEON-NOSVE-NEXT: ldr h4, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fadd s1, s1, s2 +; NONEON-NOSVE-NEXT: fcvt h2, s3 +; NONEON-NOSVE-NEXT: ldr h3, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fadd s3, s4, s3 +; NONEON-NOSVE-NEXT: ldr h4, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fadd s1, s1, s2 +; NONEON-NOSVE-NEXT: fcvt h2, s3 +; NONEON-NOSVE-NEXT: ldr h3, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fadd s3, s4, s3 +; NONEON-NOSVE-NEXT: ldr h4, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fadd s1, s1, s2 +; NONEON-NOSVE-NEXT: fcvt h2, s3 +; NONEON-NOSVE-NEXT: ldr h3, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s1, s1, s2 +; NONEON-NOSVE-NEXT: fadd s2, s4, s3 +; NONEON-NOSVE-NEXT: ldr h3, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h4, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fadd s1, s1, s2 +; NONEON-NOSVE-NEXT: fadd s2, s4, s3 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fadd s1, s1, s2 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call fast half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op) ret half %res @@ -229,6 +637,17 @@ define float @faddv_v2f32(float %start, <2 x float> %a) { ; CHECK-NEXT: faddv s1, p0, z1.s ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: faddv_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8] +; NONEON-NOSVE-NEXT: fadd s1, s2, s1 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call fast float @llvm.vector.reduce.fadd.v2f32(float %start, <2 x float> %a) ret float %res } @@ -241,6 +660,18 @@ define float @faddv_v4f32(float %start, <4 x float> %a) { ; CHECK-NEXT: faddv s1, p0, z1.s ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: faddv_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q1, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp s4, s3, [sp], #16 +; NONEON-NOSVE-NEXT: fadd s3, s4, s3 +; NONEON-NOSVE-NEXT: fadd s1, s2, s1 +; NONEON-NOSVE-NEXT: fadd s1, s3, s1 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ret %res = call fast float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %a) ret float %res } @@ -254,6 +685,26 @@ define float @faddv_v8f32(float %start, ptr %a) { ; CHECK-NEXT: faddv s1, p0, z1.s ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: faddv_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #16] +; NONEON-NOSVE-NEXT: ldp s4, s3, [sp] +; NONEON-NOSVE-NEXT: ldp s5, s6, [sp, #24] +; NONEON-NOSVE-NEXT: ldp s7, s16, [sp, #8] +; NONEON-NOSVE-NEXT: fadd s1, s3, s1 +; NONEON-NOSVE-NEXT: fadd s2, s4, s2 +; NONEON-NOSVE-NEXT: fadd s3, s7, s5 +; NONEON-NOSVE-NEXT: fadd s4, s16, s6 +; NONEON-NOSVE-NEXT: fadd s1, s2, s1 +; NONEON-NOSVE-NEXT: fadd s2, s3, s4 +; NONEON-NOSVE-NEXT: fadd s1, s1, s2 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call fast float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op) ret float %res @@ -264,6 +715,11 @@ define double @faddv_v1f64(double %start, <1 x double> %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fadd d0, d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: faddv_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fadd d0, d0, d1 +; NONEON-NOSVE-NEXT: ret %res = call fast double @llvm.vector.reduce.fadd.v1f64(double %start, <1 x double> %a) ret double %res } @@ -276,6 +732,15 @@ define double @faddv_v2f64(double %start, <2 x double> %a) { ; CHECK-NEXT: faddv d1, p0, z1.d ; CHECK-NEXT: fadd d0, d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: faddv_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q1, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp d2, d1, [sp], #16 +; NONEON-NOSVE-NEXT: fadd d1, d2, d1 +; NONEON-NOSVE-NEXT: fadd d0, d0, d1 +; NONEON-NOSVE-NEXT: ret %res = call fast double @llvm.vector.reduce.fadd.v2f64(double %start, <2 x double> %a) ret double %res } @@ -289,6 +754,19 @@ define double @faddv_v4f64(double %start, ptr %a) { ; CHECK-NEXT: faddv d1, p0, z1.d ; CHECK-NEXT: fadd d0, d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: faddv_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp d2, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d4, d3, [sp], #32 +; NONEON-NOSVE-NEXT: fadd d1, d3, d1 +; NONEON-NOSVE-NEXT: fadd d2, d4, d2 +; NONEON-NOSVE-NEXT: fadd d1, d2, d1 +; NONEON-NOSVE-NEXT: fadd d0, d0, d1 +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call fast double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op) ret double %res @@ -306,6 +784,30 @@ define half @fmaxv_v4f16(<4 x half> %a) { ; CHECK-NEXT: fmaxnmv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxv_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a) ret half %res } @@ -318,6 +820,49 @@ define half @fmaxv_v8f16(<8 x half> %a) { ; CHECK-NEXT: fmaxnmv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxv_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %a) ret half %res } @@ -331,6 +876,90 @@ define half @fmaxv_v16f16(ptr %a) { ; CHECK-NEXT: fmaxnmv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxv_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h3, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: fmaxnm s1, s3, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h3, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fmaxnm s2, s3, s2 +; NONEON-NOSVE-NEXT: ldr h3, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmaxnm s2, s3, s2 +; NONEON-NOSVE-NEXT: ldr h3, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmaxnm s2, s3, s2 +; NONEON-NOSVE-NEXT: ldr h3, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmaxnm s2, s3, s2 +; NONEON-NOSVE-NEXT: ldr h3, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 +; NONEON-NOSVE-NEXT: fmaxnm s1, s3, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h3, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 +; NONEON-NOSVE-NEXT: fmaxnm s1, s3, s2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %op) ret half %res @@ -344,6 +973,16 @@ define float @fmaxv_v2f32(<2 x float> %a) { ; CHECK-NEXT: fmaxnmv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxv_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #8] +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %a) ret float %res } @@ -356,6 +995,18 @@ define float @fmaxv_v4f32(<4 x float> %a) { ; CHECK-NEXT: fmaxnmv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxv_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp] +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8] +; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s2 +; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a) ret float %res } @@ -369,6 +1020,25 @@ define float @fmaxv_v8f32(ptr %a) { ; CHECK-NEXT: fmaxnmv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxv_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #16] +; NONEON-NOSVE-NEXT: ldp s3, s2, [sp] +; NONEON-NOSVE-NEXT: fmaxnm s0, s2, s0 +; NONEON-NOSVE-NEXT: fmaxnm s1, s3, s1 +; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #8] +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s3, s1, [sp, #24] +; NONEON-NOSVE-NEXT: fmaxnm s2, s2, s3 +; NONEON-NOSVE-NEXT: fmaxnm s1, s4, s1 +; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s2 +; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %op) ret float %res @@ -378,6 +1048,10 @@ define double @fmaxv_v1f64(<1 x double> %a) { ; CHECK-LABEL: fmaxv_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxv_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ret %res = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a) ret double %res } @@ -390,6 +1064,14 @@ define double @fmaxv_v2f64(<2 x double> %a) { ; CHECK-NEXT: fmaxnmv d0, p0, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxv_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp], #16 +; NONEON-NOSVE-NEXT: fmaxnm d0, d1, d0 +; NONEON-NOSVE-NEXT: ret %res = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a) ret double %res } @@ -403,6 +1085,18 @@ define double @fmaxv_v4f64(ptr %a) { ; CHECK-NEXT: fmaxnmv d0, p0, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxv_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d3, d2, [sp], #32 +; NONEON-NOSVE-NEXT: fmaxnm d0, d2, d0 +; NONEON-NOSVE-NEXT: fmaxnm d1, d3, d1 +; NONEON-NOSVE-NEXT: fmaxnm d0, d1, d0 +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %op) ret double %res @@ -420,6 +1114,30 @@ define half @fminv_v4f16(<4 x half> %a) { ; CHECK-NEXT: fminnmv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminv_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a) ret half %res } @@ -432,6 +1150,49 @@ define half @fminv_v8f16(<8 x half> %a) { ; CHECK-NEXT: fminnmv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminv_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %a) ret half %res } @@ -445,6 +1206,90 @@ define half @fminv_v16f16(ptr %a) { ; CHECK-NEXT: fminnmv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminv_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h3, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: fminnm s1, s3, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h3, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fminnm s2, s3, s2 +; NONEON-NOSVE-NEXT: ldr h3, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fminnm s2, s3, s2 +; NONEON-NOSVE-NEXT: ldr h3, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fminnm s2, s3, s2 +; NONEON-NOSVE-NEXT: ldr h3, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fminnm s2, s3, s2 +; NONEON-NOSVE-NEXT: ldr h3, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 +; NONEON-NOSVE-NEXT: fminnm s1, s3, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h3, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 +; NONEON-NOSVE-NEXT: fminnm s1, s3, s2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %op) ret half %res @@ -458,6 +1303,16 @@ define float @fminv_v2f32(<2 x float> %a) { ; CHECK-NEXT: fminnmv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminv_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #8] +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a) ret float %res } @@ -470,6 +1325,18 @@ define float @fminv_v4f32(<4 x float> %a) { ; CHECK-NEXT: fminnmv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminv_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp] +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8] +; NONEON-NOSVE-NEXT: fminnm s0, s0, s2 +; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a) ret float %res } @@ -483,6 +1350,25 @@ define float @fminv_v8f32(ptr %a) { ; CHECK-NEXT: fminnmv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminv_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #16] +; NONEON-NOSVE-NEXT: ldp s3, s2, [sp] +; NONEON-NOSVE-NEXT: fminnm s0, s2, s0 +; NONEON-NOSVE-NEXT: fminnm s1, s3, s1 +; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #8] +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s3, s1, [sp, #24] +; NONEON-NOSVE-NEXT: fminnm s2, s2, s3 +; NONEON-NOSVE-NEXT: fminnm s1, s4, s1 +; NONEON-NOSVE-NEXT: fminnm s0, s0, s2 +; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %op) ret float %res @@ -492,6 +1378,10 @@ define double @fminv_v1f64(<1 x double> %a) { ; CHECK-LABEL: fminv_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminv_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ret %res = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> %a) ret double %res } @@ -504,6 +1394,14 @@ define double @fminv_v2f64(<2 x double> %a) { ; CHECK-NEXT: fminnmv d0, p0, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminv_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp], #16 +; NONEON-NOSVE-NEXT: fminnm d0, d1, d0 +; NONEON-NOSVE-NEXT: ret %res = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a) ret double %res } @@ -517,6 +1415,18 @@ define double @fminv_v4f64(ptr %a) { ; CHECK-NEXT: fminnmv d0, p0, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminv_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d3, d2, [sp], #32 +; NONEON-NOSVE-NEXT: fminnm d0, d2, d0 +; NONEON-NOSVE-NEXT: fminnm d1, d3, d1 +; NONEON-NOSVE-NEXT: fminnm d0, d1, d0 +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %op) ret double %res @@ -534,6 +1444,30 @@ define half @fmaximumv_v4f16(<4 x half> %a) { ; CHECK-NEXT: fmaxv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaximumv_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> %a) ret half %res } @@ -546,6 +1480,49 @@ define half @fmaximumv_v8f16(<8 x half> %a) { ; CHECK-NEXT: fmaxv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaximumv_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> %a) ret half %res } @@ -559,6 +1536,90 @@ define half @fmaximumv_v16f16(ptr %a) { ; CHECK-NEXT: fmaxv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaximumv_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h3, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: fmax s1, s3, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h3, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fmax s2, s3, s2 +; NONEON-NOSVE-NEXT: ldr h3, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmax s2, s3, s2 +; NONEON-NOSVE-NEXT: ldr h3, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fmax s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmax s2, s3, s2 +; NONEON-NOSVE-NEXT: ldr h3, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fmax s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmax s2, s3, s2 +; NONEON-NOSVE-NEXT: ldr h3, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fmax s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s0, s0, s1 +; NONEON-NOSVE-NEXT: fmax s1, s3, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h3, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmax s0, s0, s1 +; NONEON-NOSVE-NEXT: fmax s1, s3, s2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmax s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> %op) ret half %res @@ -572,6 +1633,16 @@ define float @fmaximumv_v2f32(<2 x float> %a) { ; CHECK-NEXT: fmaxv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaximumv_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #8] +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> %a) ret float %res } @@ -584,6 +1655,18 @@ define float @fmaximumv_v4f32(<4 x float> %a) { ; CHECK-NEXT: fmaxv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaximumv_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp] +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8] +; NONEON-NOSVE-NEXT: fmax s0, s0, s2 +; NONEON-NOSVE-NEXT: fmax s0, s0, s1 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %a) ret float %res } @@ -597,6 +1680,25 @@ define float @fmaximumv_v8f32(ptr %a) { ; CHECK-NEXT: fmaxv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaximumv_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #16] +; NONEON-NOSVE-NEXT: ldp s3, s2, [sp] +; NONEON-NOSVE-NEXT: fmax s0, s2, s0 +; NONEON-NOSVE-NEXT: fmax s1, s3, s1 +; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #8] +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s3, s1, [sp, #24] +; NONEON-NOSVE-NEXT: fmax s2, s2, s3 +; NONEON-NOSVE-NEXT: fmax s1, s4, s1 +; NONEON-NOSVE-NEXT: fmax s0, s0, s2 +; NONEON-NOSVE-NEXT: fmax s0, s0, s1 +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> %op) ret float %res @@ -606,6 +1708,10 @@ define double @fmaximumv_v1f64(<1 x double> %a) { ; CHECK-LABEL: fmaximumv_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaximumv_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ret %res = call double @llvm.vector.reduce.fmaximum.v1f64(<1 x double> %a) ret double %res } @@ -618,6 +1724,14 @@ define double @fmaximumv_v2f64(<2 x double> %a) { ; CHECK-NEXT: fmaxv d0, p0, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaximumv_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp], #16 +; NONEON-NOSVE-NEXT: fmax d0, d1, d0 +; NONEON-NOSVE-NEXT: ret %res = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> %a) ret double %res } @@ -631,6 +1745,18 @@ define double @fmaximumv_v4f64(ptr %a) { ; CHECK-NEXT: fmaxv d0, p0, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaximumv_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d3, d2, [sp], #32 +; NONEON-NOSVE-NEXT: fmax d0, d2, d0 +; NONEON-NOSVE-NEXT: fmax d1, d3, d1 +; NONEON-NOSVE-NEXT: fmax d0, d1, d0 +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %op) ret double %res @@ -648,6 +1774,30 @@ define half @fminimumv_v4f16(<4 x half> %a) { ; CHECK-NEXT: fminv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminimumv_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> %a) ret half %res } @@ -660,6 +1810,49 @@ define half @fminimumv_v8f16(<8 x half> %a) { ; CHECK-NEXT: fminv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminimumv_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> %a) ret half %res } @@ -673,6 +1866,90 @@ define half @fminimumv_v16f16(ptr %a) { ; CHECK-NEXT: fminv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminimumv_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: ldr h2, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h3, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: fmin s1, s3, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h3, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fmin s2, s3, s2 +; NONEON-NOSVE-NEXT: ldr h3, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmin s2, s3, s2 +; NONEON-NOSVE-NEXT: ldr h3, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fmin s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmin s2, s3, s2 +; NONEON-NOSVE-NEXT: ldr h3, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fmin s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmin s2, s3, s2 +; NONEON-NOSVE-NEXT: ldr h3, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fmin s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s0, s0, s1 +; NONEON-NOSVE-NEXT: fmin s1, s3, s2 +; NONEON-NOSVE-NEXT: ldr h2, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h3, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmin s0, s0, s1 +; NONEON-NOSVE-NEXT: fmin s1, s3, s2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmin s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> %op) ret half %res @@ -686,6 +1963,16 @@ define float @fminimumv_v2f32(<2 x float> %a) { ; CHECK-NEXT: fminv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminimumv_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #8] +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> %a) ret float %res } @@ -698,6 +1985,18 @@ define float @fminimumv_v4f32(<4 x float> %a) { ; CHECK-NEXT: fminv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminimumv_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp] +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8] +; NONEON-NOSVE-NEXT: fmin s0, s0, s2 +; NONEON-NOSVE-NEXT: fmin s0, s0, s1 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %a) ret float %res } @@ -711,6 +2010,25 @@ define float @fminimumv_v8f32(ptr %a) { ; CHECK-NEXT: fminv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminimumv_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #16] +; NONEON-NOSVE-NEXT: ldp s3, s2, [sp] +; NONEON-NOSVE-NEXT: fmin s0, s2, s0 +; NONEON-NOSVE-NEXT: fmin s1, s3, s1 +; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #8] +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s3, s1, [sp, #24] +; NONEON-NOSVE-NEXT: fmin s2, s2, s3 +; NONEON-NOSVE-NEXT: fmin s1, s4, s1 +; NONEON-NOSVE-NEXT: fmin s0, s0, s2 +; NONEON-NOSVE-NEXT: fmin s0, s0, s1 +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> %op) ret float %res @@ -720,6 +2038,10 @@ define double @fminimumv_v1f64(<1 x double> %a) { ; CHECK-LABEL: fminimumv_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminimumv_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ret %res = call double @llvm.vector.reduce.fminimum.v1f64(<1 x double> %a) ret double %res } @@ -732,6 +2054,14 @@ define double @fminimumv_v2f64(<2 x double> %a) { ; CHECK-NEXT: fminv d0, p0, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminimumv_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp], #16 +; NONEON-NOSVE-NEXT: fmin d0, d1, d0 +; NONEON-NOSVE-NEXT: ret %res = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> %a) ret double %res } @@ -745,6 +2075,18 @@ define double @fminimumv_v4f64(ptr %a) { ; CHECK-NEXT: fminv d0, p0, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminimumv_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d3, d2, [sp], #32 +; NONEON-NOSVE-NEXT: fmin d0, d2, d0 +; NONEON-NOSVE-NEXT: fmin d1, d3, d1 +; NONEON-NOSVE-NEXT: fmin d0, d1, d0 +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %op) ret double %res diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll index 412c27cb82f1d..344aac5b19838 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -16,6 +17,34 @@ define <2 x half> @frintp_v2f16(<2 x half> %op) { ; CHECK-NEXT: frintp z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintp_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x half> @llvm.ceil.v2f16(<2 x half> %op) ret <2 x half> %res } @@ -28,6 +57,34 @@ define <4 x half> @frintp_v4f16(<4 x half> %op) { ; CHECK-NEXT: frintp z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintp_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.ceil.v4f16(<4 x half> %op) ret <4 x half> %res } @@ -40,6 +97,54 @@ define <8 x half> @frintp_v8f16(<8 x half> %op) { ; CHECK-NEXT: frintp z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintp_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.ceil.v8f16(<8 x half> %op) ret <8 x half> %res } @@ -53,6 +158,96 @@ define void @frintp_v16f16(ptr %a) { ; CHECK-NEXT: frintp z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintp_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #38] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call <16 x half> @llvm.ceil.v16f16(<16 x half> %op) store <16 x half> %res, ptr %a @@ -67,6 +262,19 @@ define <2 x float> @frintp_v2f32(<2 x float> %op) { ; CHECK-NEXT: frintp z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintp_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: frintp s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.ceil.v2f32(<2 x float> %op) ret <2 x float> %res } @@ -79,6 +287,24 @@ define <4 x float> @frintp_v4f32(<4 x float> %op) { ; CHECK-NEXT: frintp z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintp_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: frintp s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: frintp s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.ceil.v4f32(<4 x float> %op) ret <4 x float> %res } @@ -92,6 +318,36 @@ define void @frintp_v8f32(ptr %a) { ; CHECK-NEXT: frintp z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintp_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: frintp s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: frintp s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: frintp s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: frintp s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: frintp s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call <8 x float> @llvm.ceil.v8f32(<8 x float> %op) store <8 x float> %res, ptr %a @@ -103,6 +359,16 @@ define <1 x double> @frintp_v1f64(<1 x double> %op) { ; CHECK: // %bb.0: ; CHECK-NEXT: frintp d0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintp_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: frintp d0, d0 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <1 x double> @llvm.ceil.v1f64(<1 x double> %op) ret <1 x double> %res } @@ -115,6 +381,19 @@ define <2 x double> @frintp_v2f64(<2 x double> %op) { ; CHECK-NEXT: frintp z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintp_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: frintp d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: frintp d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.ceil.v2f64(<2 x double> %op) ret <2 x double> %res } @@ -128,6 +407,26 @@ define void @frintp_v4f64(ptr %a) { ; CHECK-NEXT: frintp z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintp_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: frintp d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: frintp d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: frintp d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: frintp d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call <4 x double> @llvm.ceil.v4f64(<4 x double> %op) store <4 x double> %res, ptr %a @@ -146,6 +445,34 @@ define <2 x half> @frintm_v2f16(<2 x half> %op) { ; CHECK-NEXT: frintm z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintm_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x half> @llvm.floor.v2f16(<2 x half> %op) ret <2 x half> %res } @@ -158,6 +485,34 @@ define <4 x half> @frintm_v4f16(<4 x half> %op) { ; CHECK-NEXT: frintm z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintm_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.floor.v4f16(<4 x half> %op) ret <4 x half> %res } @@ -170,6 +525,54 @@ define <8 x half> @frintm_v8f16(<8 x half> %op) { ; CHECK-NEXT: frintm z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintm_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.floor.v8f16(<8 x half> %op) ret <8 x half> %res } @@ -183,6 +586,96 @@ define void @frintm_v16f16(ptr %a) { ; CHECK-NEXT: frintm z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintm_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #38] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call <16 x half> @llvm.floor.v16f16(<16 x half> %op) store <16 x half> %res, ptr %a @@ -197,6 +690,19 @@ define <2 x float> @frintm_v2f32(<2 x float> %op) { ; CHECK-NEXT: frintm z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintm_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: frintm s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.floor.v2f32(<2 x float> %op) ret <2 x float> %res } @@ -209,6 +715,24 @@ define <4 x float> @frintm_v4f32(<4 x float> %op) { ; CHECK-NEXT: frintm z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintm_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: frintm s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: frintm s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.floor.v4f32(<4 x float> %op) ret <4 x float> %res } @@ -222,6 +746,36 @@ define void @frintm_v8f32(ptr %a) { ; CHECK-NEXT: frintm z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintm_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: frintm s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: frintm s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: frintm s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: frintm s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: frintm s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call <8 x float> @llvm.floor.v8f32(<8 x float> %op) store <8 x float> %res, ptr %a @@ -233,6 +787,16 @@ define <1 x double> @frintm_v1f64(<1 x double> %op) { ; CHECK: // %bb.0: ; CHECK-NEXT: frintm d0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintm_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: frintm d0, d0 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <1 x double> @llvm.floor.v1f64(<1 x double> %op) ret <1 x double> %res } @@ -245,6 +809,19 @@ define <2 x double> @frintm_v2f64(<2 x double> %op) { ; CHECK-NEXT: frintm z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintm_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: frintm d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: frintm d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.floor.v2f64(<2 x double> %op) ret <2 x double> %res } @@ -258,6 +835,26 @@ define void @frintm_v4f64(ptr %a) { ; CHECK-NEXT: frintm z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintm_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: frintm d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: frintm d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: frintm d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: frintm d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call <4 x double> @llvm.floor.v4f64(<4 x double> %op) store <4 x double> %res, ptr %a @@ -276,6 +873,34 @@ define <2 x half> @frinti_v2f16(<2 x half> %op) { ; CHECK-NEXT: frinti z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinti_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x half> @llvm.nearbyint.v2f16(<2 x half> %op) ret <2 x half> %res } @@ -288,6 +913,34 @@ define <4 x half> @frinti_v4f16(<4 x half> %op) { ; CHECK-NEXT: frinti z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinti_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.nearbyint.v4f16(<4 x half> %op) ret <4 x half> %res } @@ -300,6 +953,54 @@ define <8 x half> @frinti_v8f16(<8 x half> %op) { ; CHECK-NEXT: frinti z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinti_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %op) ret <8 x half> %res } @@ -313,6 +1014,96 @@ define void @frinti_v16f16(ptr %a) { ; CHECK-NEXT: frinti z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinti_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #38] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call <16 x half> @llvm.nearbyint.v16f16(<16 x half> %op) store <16 x half> %res, ptr %a @@ -327,6 +1118,19 @@ define <2 x float> @frinti_v2f32(<2 x float> %op) { ; CHECK-NEXT: frinti z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinti_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: frinti s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %op) ret <2 x float> %res } @@ -339,6 +1143,24 @@ define <4 x float> @frinti_v4f32(<4 x float> %op) { ; CHECK-NEXT: frinti z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinti_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: frinti s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: frinti s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %op) ret <4 x float> %res } @@ -352,6 +1174,36 @@ define void @frinti_v8f32(ptr %a) { ; CHECK-NEXT: frinti z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinti_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: frinti s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: frinti s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: frinti s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: frinti s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: frinti s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %op) store <8 x float> %res, ptr %a @@ -363,6 +1215,16 @@ define <1 x double> @frinti_v1f64(<1 x double> %op) { ; CHECK: // %bb.0: ; CHECK-NEXT: frinti d0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinti_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: frinti d0, d0 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %op) ret <1 x double> %res } @@ -375,6 +1237,19 @@ define <2 x double> @frinti_v2f64(<2 x double> %op) { ; CHECK-NEXT: frinti z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinti_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: frinti d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: frinti d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %op) ret <2 x double> %res } @@ -388,6 +1263,26 @@ define void @frinti_v4f64(ptr %a) { ; CHECK-NEXT: frinti z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinti_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: frinti d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: frinti d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: frinti d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: frinti d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %op) store <4 x double> %res, ptr %a @@ -406,6 +1301,34 @@ define <2 x half> @frintx_v2f16(<2 x half> %op) { ; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintx_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x half> @llvm.rint.v2f16(<2 x half> %op) ret <2 x half> %res } @@ -418,6 +1341,34 @@ define <4 x half> @frintx_v4f16(<4 x half> %op) { ; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintx_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.rint.v4f16(<4 x half> %op) ret <4 x half> %res } @@ -430,6 +1381,54 @@ define <8 x half> @frintx_v8f16(<8 x half> %op) { ; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintx_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.rint.v8f16(<8 x half> %op) ret <8 x half> %res } @@ -443,6 +1442,96 @@ define void @frintx_v16f16(ptr %a) { ; CHECK-NEXT: frintx z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintx_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #38] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call <16 x half> @llvm.rint.v16f16(<16 x half> %op) store <16 x half> %res, ptr %a @@ -457,6 +1546,19 @@ define <2 x float> @frintx_v2f32(<2 x float> %op) { ; CHECK-NEXT: frintx z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintx_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: frintx s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.rint.v2f32(<2 x float> %op) ret <2 x float> %res } @@ -469,6 +1571,24 @@ define <4 x float> @frintx_v4f32(<4 x float> %op) { ; CHECK-NEXT: frintx z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintx_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: frintx s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: frintx s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.rint.v4f32(<4 x float> %op) ret <4 x float> %res } @@ -482,6 +1602,36 @@ define void @frintx_v8f32(ptr %a) { ; CHECK-NEXT: frintx z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintx_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: frintx s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: frintx s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: frintx s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: frintx s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: frintx s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call <8 x float> @llvm.rint.v8f32(<8 x float> %op) store <8 x float> %res, ptr %a @@ -493,6 +1643,16 @@ define <1 x double> @frintx_v1f64(<1 x double> %op) { ; CHECK: // %bb.0: ; CHECK-NEXT: frintx d0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintx_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: frintx d0, d0 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <1 x double> @llvm.rint.v1f64(<1 x double> %op) ret <1 x double> %res } @@ -505,6 +1665,19 @@ define <2 x double> @frintx_v2f64(<2 x double> %op) { ; CHECK-NEXT: frintx z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintx_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: frintx d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: frintx d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.rint.v2f64(<2 x double> %op) ret <2 x double> %res } @@ -518,6 +1691,26 @@ define void @frintx_v4f64(ptr %a) { ; CHECK-NEXT: frintx z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintx_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: frintx d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: frintx d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: frintx d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: frintx d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call <4 x double> @llvm.rint.v4f64(<4 x double> %op) store <4 x double> %res, ptr %a @@ -536,6 +1729,34 @@ define <2 x half> @frinta_v2f16(<2 x half> %op) { ; CHECK-NEXT: frinta z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinta_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x half> @llvm.round.v2f16(<2 x half> %op) ret <2 x half> %res } @@ -548,6 +1769,34 @@ define <4 x half> @frinta_v4f16(<4 x half> %op) { ; CHECK-NEXT: frinta z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinta_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.round.v4f16(<4 x half> %op) ret <4 x half> %res } @@ -560,6 +1809,54 @@ define <8 x half> @frinta_v8f16(<8 x half> %op) { ; CHECK-NEXT: frinta z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinta_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.round.v8f16(<8 x half> %op) ret <8 x half> %res } @@ -573,6 +1870,96 @@ define void @frinta_v16f16(ptr %a) { ; CHECK-NEXT: frinta z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinta_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #38] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call <16 x half> @llvm.round.v16f16(<16 x half> %op) store <16 x half> %res, ptr %a @@ -587,6 +1974,19 @@ define <2 x float> @frinta_v2f32(<2 x float> %op) { ; CHECK-NEXT: frinta z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinta_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: frinta s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.round.v2f32(<2 x float> %op) ret <2 x float> %res } @@ -599,6 +1999,24 @@ define <4 x float> @frinta_v4f32(<4 x float> %op) { ; CHECK-NEXT: frinta z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinta_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: frinta s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: frinta s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.round.v4f32(<4 x float> %op) ret <4 x float> %res } @@ -612,6 +2030,36 @@ define void @frinta_v8f32(ptr %a) { ; CHECK-NEXT: frinta z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinta_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: frinta s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: frinta s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: frinta s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: frinta s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: frinta s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call <8 x float> @llvm.round.v8f32(<8 x float> %op) store <8 x float> %res, ptr %a @@ -623,6 +2071,16 @@ define <1 x double> @frinta_v1f64(<1 x double> %op) { ; CHECK: // %bb.0: ; CHECK-NEXT: frinta d0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinta_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: frinta d0, d0 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <1 x double> @llvm.round.v1f64(<1 x double> %op) ret <1 x double> %res } @@ -635,6 +2093,19 @@ define <2 x double> @frinta_v2f64(<2 x double> %op) { ; CHECK-NEXT: frinta z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinta_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: frinta d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: frinta d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.round.v2f64(<2 x double> %op) ret <2 x double> %res } @@ -648,6 +2119,26 @@ define void @frinta_v4f64(ptr %a) { ; CHECK-NEXT: frinta z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinta_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: frinta d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: frinta d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: frinta d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: frinta d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call <4 x double> @llvm.round.v4f64(<4 x double> %op) store <4 x double> %res, ptr %a @@ -666,6 +2157,34 @@ define <2 x half> @frintn_v2f16(<2 x half> %op) { ; CHECK-NEXT: frintn z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintn_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %op) ret <2 x half> %res } @@ -678,6 +2197,34 @@ define <4 x half> @frintn_v4f16(<4 x half> %op) { ; CHECK-NEXT: frintn z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintn_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %op) ret <4 x half> %res } @@ -690,6 +2237,54 @@ define <8 x half> @frintn_v8f16(<8 x half> %op) { ; CHECK-NEXT: frintn z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintn_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %op) ret <8 x half> %res } @@ -703,6 +2298,96 @@ define void @frintn_v16f16(ptr %a) { ; CHECK-NEXT: frintn z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintn_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #38] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %op) store <16 x half> %res, ptr %a @@ -717,6 +2402,19 @@ define <2 x float> @frintn_v2f32(<2 x float> %op) { ; CHECK-NEXT: frintn z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintn_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: frintn s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %op) ret <2 x float> %res } @@ -729,6 +2427,24 @@ define <4 x float> @frintn_v4f32(<4 x float> %op) { ; CHECK-NEXT: frintn z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintn_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: frintn s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: frintn s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %op) ret <4 x float> %res } @@ -742,6 +2458,36 @@ define void @frintn_v8f32(ptr %a) { ; CHECK-NEXT: frintn z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintn_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: frintn s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: frintn s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: frintn s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: frintn s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: frintn s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %op) store <8 x float> %res, ptr %a @@ -753,6 +2499,16 @@ define <1 x double> @frintn_v1f64(<1 x double> %op) { ; CHECK: // %bb.0: ; CHECK-NEXT: frintn d0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintn_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: frintn d0, d0 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %op) ret <1 x double> %res } @@ -765,6 +2521,19 @@ define <2 x double> @frintn_v2f64(<2 x double> %op) { ; CHECK-NEXT: frintn z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintn_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: frintn d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: frintn d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %op) ret <2 x double> %res } @@ -778,6 +2547,26 @@ define void @frintn_v4f64(ptr %a) { ; CHECK-NEXT: frintn z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintn_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: frintn d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: frintn d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: frintn d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: frintn d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %op) store <4 x double> %res, ptr %a @@ -796,6 +2585,34 @@ define <2 x half> @frintz_v2f16(<2 x half> %op) { ; CHECK-NEXT: frintz z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintz_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x half> @llvm.trunc.v2f16(<2 x half> %op) ret <2 x half> %res } @@ -808,6 +2625,34 @@ define <4 x half> @frintz_v4f16(<4 x half> %op) { ; CHECK-NEXT: frintz z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintz_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.trunc.v4f16(<4 x half> %op) ret <4 x half> %res } @@ -820,6 +2665,54 @@ define <8 x half> @frintz_v8f16(<8 x half> %op) { ; CHECK-NEXT: frintz z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintz_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.trunc.v8f16(<8 x half> %op) ret <8 x half> %res } @@ -833,6 +2726,96 @@ define void @frintz_v16f16(ptr %a) { ; CHECK-NEXT: frintz z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintz_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #38] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call <16 x half> @llvm.trunc.v16f16(<16 x half> %op) store <16 x half> %res, ptr %a @@ -847,6 +2830,19 @@ define <2 x float> @frintz_v2f32(<2 x float> %op) { ; CHECK-NEXT: frintz z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintz_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: frintz s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.trunc.v2f32(<2 x float> %op) ret <2 x float> %res } @@ -859,6 +2855,24 @@ define <4 x float> @frintz_v4f32(<4 x float> %op) { ; CHECK-NEXT: frintz z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintz_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: frintz s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: frintz s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.trunc.v4f32(<4 x float> %op) ret <4 x float> %res } @@ -872,6 +2886,36 @@ define void @frintz_v8f32(ptr %a) { ; CHECK-NEXT: frintz z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintz_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: frintz s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: frintz s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: frintz s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: frintz s1, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: frintz s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call <8 x float> @llvm.trunc.v8f32(<8 x float> %op) store <8 x float> %res, ptr %a @@ -883,6 +2927,16 @@ define <1 x double> @frintz_v1f64(<1 x double> %op) { ; CHECK: // %bb.0: ; CHECK-NEXT: frintz d0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintz_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: frintz d0, d0 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <1 x double> @llvm.trunc.v1f64(<1 x double> %op) ret <1 x double> %res } @@ -895,6 +2949,19 @@ define <2 x double> @frintz_v2f64(<2 x double> %op) { ; CHECK-NEXT: frintz z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintz_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: frintz d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: frintz d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.trunc.v2f64(<2 x double> %op) ret <2 x double> %res } @@ -908,6 +2975,26 @@ define void @frintz_v4f64(ptr %a) { ; CHECK-NEXT: frintz z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintz_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: frintz d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: frintz d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: frintz d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: frintz d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call <4 x double> @llvm.trunc.v4f64(<4 x double> %op) store <4 x double> %res, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll index 89697cde848b5..daa9b51cc827b 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -16,6 +17,32 @@ define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <2 x half> %op1, <2 x half> %op2 ret <2 x half> %sel } @@ -32,6 +59,32 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <4 x half> %op1, <4 x half> %op2 ret <4 x half> %sel } @@ -48,6 +101,47 @@ define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: str h0, [sp, #38] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <8 x half> %op1, <8 x half> %op2 ret <8 x half> %sel } @@ -67,6 +161,87 @@ define void @select_v16f16(ptr %a, ptr %b, i1 %mask) { ; CHECK-NEXT: sel z1.h, p0, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: tst w2, #0x1 +; NONEON-NOSVE-NEXT: ldr q2, [x1] +; NONEON-NOSVE-NEXT: ldr q3, [x1, #16] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: stp q1, q3, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: str q2, [sp, #48] +; NONEON-NOSVE-NEXT: fcsel s0, s0, s1, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: str h0, [sp, #78] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcsel s0, s0, s1, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: str h0, [sp, #76] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcsel s0, s0, s1, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: str h0, [sp, #74] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcsel s0, s0, s1, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: str h0, [sp, #72] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcsel s0, s0, s1, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: str h0, [sp, #70] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcsel s0, s0, s1, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: str h0, [sp, #68] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcsel s0, s0, s1, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: str h0, [sp, #66] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcsel s0, s0, s1, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #62] +; NONEON-NOSVE-NEXT: str h0, [sp, #64] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcsel s0, s0, s1, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #60] +; NONEON-NOSVE-NEXT: str h0, [sp, #94] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcsel s0, s0, s1, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #58] +; NONEON-NOSVE-NEXT: str h0, [sp, #92] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcsel s0, s0, s1, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #56] +; NONEON-NOSVE-NEXT: str h0, [sp, #90] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcsel s0, s0, s1, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #54] +; NONEON-NOSVE-NEXT: str h0, [sp, #88] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcsel s0, s0, s1, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #52] +; NONEON-NOSVE-NEXT: str h0, [sp, #86] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcsel s0, s0, s1, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #50] +; NONEON-NOSVE-NEXT: str h0, [sp, #84] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcsel s0, s0, s1, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #48] +; NONEON-NOSVE-NEXT: str h0, [sp, #82] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcsel s0, s0, s1, ne +; NONEON-NOSVE-NEXT: str h0, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load volatile <16 x half>, ptr %a %op2 = load volatile <16 x half>, ptr %b %sel = select i1 %mask, <16 x half> %op1, <16 x half> %op2 @@ -86,6 +261,22 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fcsel s3, s2, s0, ne +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <2 x float> %op1, <2 x float> %op2 ret <2 x float> %sel } @@ -102,6 +293,27 @@ define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fcsel s3, s2, s0, ne +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fcsel s3, s2, s0, ne +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <4 x float> %op1, <4 x float> %op2 ret <4 x float> %sel } @@ -121,6 +333,47 @@ define void @select_v8f32(ptr %a, ptr %b, i1 %mask) { ; CHECK-NEXT: sel z1.s, p0, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: tst w2, #0x1 +; NONEON-NOSVE-NEXT: ldr q2, [x1] +; NONEON-NOSVE-NEXT: ldr q3, [x1, #16] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: stp q1, q3, [sp, #16] +; NONEON-NOSVE-NEXT: str q2, [sp, #48] +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fcsel s3, s0, s2, ne +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fcsel s0, s0, s1, ne +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #32] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #72] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fcsel s3, s0, s2, ne +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fcsel s0, s0, s1, ne +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #56] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #64] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: fcsel s3, s0, s2, ne +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: fcsel s0, s0, s1, ne +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #48] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #88] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] +; NONEON-NOSVE-NEXT: fcsel s3, s0, s2, ne +; NONEON-NOSVE-NEXT: ldr s0, [sp] +; NONEON-NOSVE-NEXT: fcsel s0, s0, s1, ne +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load volatile <8 x float>, ptr %a %op2 = load volatile <8 x float>, ptr %b %sel = select i1 %mask, <8 x float> %op1, <8 x float> %op2 @@ -134,6 +387,17 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, i1 %mask ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: fcsel d0, d0, d1, ne ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: fcsel d0, d0, d1, ne +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <1 x double> %op1, <1 x double> %op2 ret <1 x double> %sel } @@ -151,6 +415,21 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, i1 %mask ; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp] +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: fcsel d3, d2, d0, ne +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, ne +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <2 x double> %op1, <2 x double> %op2 ret <2 x double> %sel } @@ -171,6 +450,35 @@ define void @select_v4f64(ptr %a, ptr %b, i1 %mask) { ; CHECK-NEXT: sel z1.d, p0, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: tst w2, #0x1 +; NONEON-NOSVE-NEXT: ldr q2, [x1] +; NONEON-NOSVE-NEXT: ldr q3, [x1, #16] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: stp q1, q3, [sp, #16] +; NONEON-NOSVE-NEXT: str q2, [sp, #48] +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp, #32] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: fcsel d3, d0, d2, ne +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: fcsel d0, d0, d1, ne +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp, #48] +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #64] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: fcsel d3, d0, d2, ne +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: fcsel d0, d0, d1, ne +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load volatile <4 x double>, ptr %a %op2 = load volatile <4 x double>, ptr %b %sel = select i1 %mask, <4 x double> %op1, <4 x double> %op2 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll index 5840ffb20994c..0d92a6fa0fa28 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -15,6 +16,30 @@ define <4 x i16> @fcvtzu_v4f16_v4i16(<4 x half> %op1) { ; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v4f16_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #14] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #12] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #10] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = fptoui <4 x half> %op1 to <4 x i16> ret <4 x i16> %res } @@ -27,6 +52,48 @@ define void @fcvtzu_v8f16_v8i16(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v8f16_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #22] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #20] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #18] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x half>, ptr %a %res = fptoui <8 x half> %op1 to <8 x i16> store <8 x i16> %res, ptr %b @@ -42,6 +109,80 @@ define void @fcvtzu_v16f16_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzu z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v16f16_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %res = fptoui <16 x half> %op1 to <16 x i16> store <16 x i16> %res, ptr %b @@ -61,6 +202,21 @@ define <2 x i32> @fcvtzu_v2f16_v2i32(<2 x half> %op1) { ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v2f16_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = fptoui <2 x half> %op1 to <2 x i32> ret <2 x i32> %res } @@ -74,6 +230,29 @@ define <4 x i32> @fcvtzu_v4f16_v4i32(<4 x half> %op1) { ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v4f16_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: fcvtzu w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fptoui <4 x half> %op1 to <4 x i32> ret <4 x i32> %res } @@ -90,6 +269,46 @@ define void @fcvtzu_v8f16_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v8f16_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: fcvtzu w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: fcvtzu w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: fcvtzu w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x half>, ptr %a %res = fptoui <8 x half> %op1 to <8 x i32> store <8 x i32> %res, ptr %b @@ -114,6 +333,78 @@ define void @fcvtzu_v16f16_v16i32(ptr %a, ptr %b) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v16f16_v16i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-128]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] +; NONEON-NOSVE-NEXT: fcvtzu w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] +; NONEON-NOSVE-NEXT: fcvtzu w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] +; NONEON-NOSVE-NEXT: fcvtzu w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64] +; NONEON-NOSVE-NEXT: fcvtzu w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] +; NONEON-NOSVE-NEXT: fcvtzu w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] +; NONEON-NOSVE-NEXT: fcvtzu w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] +; NONEON-NOSVE-NEXT: fcvtzu w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q2, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %res = fptoui <16 x half> %op1 to <16 x i32> store <16 x i32> %res, ptr %b @@ -130,6 +421,17 @@ define <1 x i64> @fcvtzu_v1f16_v1i64(<1 x half> %op1) { ; CHECK-NEXT: fcvtzu x8, h0 ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v1f16_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = fptoui <1 x half> %op1 to <1 x i64> ret <1 x i64> %res } @@ -145,6 +447,22 @@ define <2 x i64> @fcvtzu_v2f16_v2i64(<2 x half> %op1) { ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr q0, [sp], #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v2f16_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fptoui <2 x half> %op1 to <2 x i64> ret <2 x i64> %res } @@ -167,6 +485,31 @@ define void @fcvtzu_v4f16_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v4f16_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #48 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: fcvtzu x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #16] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x half>, ptr %a %res = fptoui <4 x half> %op1 to <4 x i64> store <4 x i64> %res, ptr %b @@ -204,6 +547,48 @@ define void @fcvtzu_v8f16_v8i64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q1, q0, [x1, #32] ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v8f16_v8i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #64] +; NONEON-NOSVE-NEXT: fcvtzu x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q2, q3, [sp, #64] +; NONEON-NOSVE-NEXT: fcvtzu x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: fcvtzu x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: stp q2, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x half>, ptr %a %res = fptoui <8 x half> %op1 to <8 x i64> store <8 x i64> %res, ptr %b @@ -264,6 +649,83 @@ define void @fcvtzu_v16f16_v16i64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q5, q2, [x1, #96] ; CHECK-NEXT: add sp, sp, #128 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v16f16_v16i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #192 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 192 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #96] +; NONEON-NOSVE-NEXT: fcvtzu x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #112] +; NONEON-NOSVE-NEXT: fcvtzu x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #64] +; NONEON-NOSVE-NEXT: fcvtzu x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q3, q4, [sp, #64] +; NONEON-NOSVE-NEXT: fcvtzu x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #160] +; NONEON-NOSVE-NEXT: fcvtzu x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #176] +; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #160] +; NONEON-NOSVE-NEXT: fcvtzu x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #128] +; NONEON-NOSVE-NEXT: fcvtzu x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #144] +; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #128] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32] +; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64] +; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96] +; NONEON-NOSVE-NEXT: add sp, sp, #192 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %res = fptoui <16 x half> %op1 to <16 x i64> store <16 x i64> %res, ptr %b @@ -282,6 +744,18 @@ define <2 x i16> @fcvtzu_v2f32_v2i16(<2 x float> %op1) { ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v2f32_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp] +; NONEON-NOSVE-NEXT: fcvtzu w9, s1 +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = fptoui <2 x float> %op1 to <2 x i16> ret <2 x i16> %res } @@ -295,6 +769,24 @@ define <4 x i16> @fcvtzu_v4f32_v4i16(<4 x float> %op1) { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v4f32_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fptoui <4 x float> %op1 to <4 x i16> ret <4 x i16> %res } @@ -312,6 +804,35 @@ define <8 x i16> @fcvtzu_v8f32_v8i16(ptr %a) { ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v8f32_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %res = fptoui <8 x float> %op1 to <8 x i16> ret <8 x i16> %res @@ -336,6 +857,60 @@ define void @fcvtzu_v16f32_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h ; CHECK-NEXT: stp q2, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v16f32_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0, #32] +; NONEON-NOSVE-NEXT: str q1, [sp] +; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #40] +; NONEON-NOSVE-NEXT: str q2, [sp, #48] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #56] +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #48] +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x float>, ptr %a %res = fptoui <16 x float> %op1 to <16 x i16> store <16 x i16> %res, ptr %b @@ -354,6 +929,18 @@ define <2 x i32> @fcvtzu_v2f32_v2i32(<2 x float> %op1) { ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v2f32_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp] +; NONEON-NOSVE-NEXT: fcvtzu w9, s1 +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = fptoui <2 x float> %op1 to <2 x i32> ret <2 x i32> %res } @@ -366,6 +953,22 @@ define <4 x i32> @fcvtzu_v4f32_v4i32(<4 x float> %op1) { ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v4f32_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtzu w9, s1 +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: fcvtzu w9, s1 +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fptoui <4 x float> %op1 to <4 x i32> ret <4 x i32> %res } @@ -379,6 +982,32 @@ define void @fcvtzu_v8f32_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzu z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v8f32_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: fcvtzu w9, s1 +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: fcvtzu w9, s1 +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: fcvtzu w9, s1 +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: fcvtzu w9, s1 +; NONEON-NOSVE-NEXT: fcvtzu w8, s0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %res = fptoui <8 x float> %op1 to <8 x i32> store <8 x i32> %res, ptr %b @@ -398,6 +1027,17 @@ define <1 x i64> @fcvtzu_v1f32_v1i64(<1 x float> %op1) { ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v1f32_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = fptoui <1 x float> %op1 to <1 x i64> ret <1 x i64> %res } @@ -411,6 +1051,19 @@ define <2 x i64> @fcvtzu_v2f32_v2i64(<2 x float> %op1) { ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v2f32_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtzu x9, s1 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fptoui <2 x float> %op1 to <2 x i64> ret <2 x i64> %res } @@ -427,6 +1080,26 @@ define void @fcvtzu_v4f32_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v4f32_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: fcvtzu x9, s1 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: fcvtzu x9, s1 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x float>, ptr %a %res = fptoui <4 x float> %op1 to <4 x i64> store <4 x i64> %res, ptr %b @@ -451,6 +1124,38 @@ define void @fcvtzu_v8f32_v8i64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v8f32_v8i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-128]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvtzu x9, s1 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #32] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #80] +; NONEON-NOSVE-NEXT: fcvtzu x9, s1 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #56] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #64] +; NONEON-NOSVE-NEXT: fcvtzu x9, s1 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #48] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #112] +; NONEON-NOSVE-NEXT: fcvtzu x9, s1 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q2, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %res = fptoui <8 x float> %op1 to <8 x i64> store <8 x i64> %res, ptr %b @@ -468,6 +1173,16 @@ define <1 x i16> @fcvtzu_v1f64_v1i16(<1 x double> %op1) { ; CHECK-NEXT: mov z0.h, w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v1f64_v1i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = fptoui <1 x double> %op1 to <1 x i16> ret <1 x i16> %res } @@ -481,6 +1196,18 @@ define <2 x i16> @fcvtzu_v2f64_v2i16(<2 x double> %op1) { ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v2f64_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp] +; NONEON-NOSVE-NEXT: fcvtzu w9, d1 +; NONEON-NOSVE-NEXT: fcvtzu w8, d0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fptoui <2 x double> %op1 to <2 x i16> ret <2 x i16> %res } @@ -509,6 +1236,31 @@ define <4 x i16> @fcvtzu_v4f64_v4i16(ptr %a) { ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v4f64_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-80]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: fcvtzu w9, d1 +; NONEON-NOSVE-NEXT: fcvtzu w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: fcvtzu w9, d1 +; NONEON-NOSVE-NEXT: fcvtzu w8, d0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #40] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #56] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #64] +; NONEON-NOSVE-NEXT: strh w9, [sp, #78] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: strh w9, [sp, #74] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #72] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %res = fptoui <4 x double> %op1 to <4 x i16> ret <4 x i16> %res @@ -552,6 +1304,53 @@ define <8 x i16> @fcvtzu_v8f64_v8i16(ptr %a) { ; CHECK-NEXT: strh w8, [sp, #2] ; CHECK-NEXT: ldr q0, [sp], #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v8f64_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #144 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 144 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: str q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp q0, q3, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: str q2, [sp] +; NONEON-NOSVE-NEXT: fcvtzu w9, d1 +; NONEON-NOSVE-NEXT: fcvtzu w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] +; NONEON-NOSVE-NEXT: fcvtzu w9, d1 +; NONEON-NOSVE-NEXT: fcvtzu w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] +; NONEON-NOSVE-NEXT: fcvtzu w9, d1 +; NONEON-NOSVE-NEXT: fcvtzu w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] +; NONEON-NOSVE-NEXT: fcvtzu w9, d1 +; NONEON-NOSVE-NEXT: fcvtzu w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d2, [sp, #80] +; NONEON-NOSVE-NEXT: ldr d1, [sp, #72] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #104] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #104] +; NONEON-NOSVE-NEXT: str d2, [sp, #120] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #64] +; NONEON-NOSVE-NEXT: strh w9, [sp, #142] +; NONEON-NOSVE-NEXT: strh w8, [sp, #140] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #120] +; NONEON-NOSVE-NEXT: str d0, [sp, #96] +; NONEON-NOSVE-NEXT: strh w9, [sp, #138] +; NONEON-NOSVE-NEXT: strh w8, [sp, #136] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #112] +; NONEON-NOSVE-NEXT: strh w9, [sp, #134] +; NONEON-NOSVE-NEXT: strh w8, [sp, #132] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #96] +; NONEON-NOSVE-NEXT: strh w9, [sp, #130] +; NONEON-NOSVE-NEXT: strh w8, [sp, #128] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #128] +; NONEON-NOSVE-NEXT: add sp, sp, #144 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x double>, ptr %a %res = fptoui <8 x double> %op1 to <8 x i16> ret <8 x i16> %res @@ -628,6 +1427,94 @@ define void @fcvtzu_v16f64_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v16f64_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #304 +; NONEON-NOSVE-NEXT: str x29, [sp, #288] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 304 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32] +; NONEON-NOSVE-NEXT: ldr x29, [sp, #288] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp q6, q7, [x0] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0, #64] +; NONEON-NOSVE-NEXT: ldp q4, q5, [x0, #96] +; NONEON-NOSVE-NEXT: stp q1, q7, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q2, [sp, #96] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q6, q4, [sp] +; NONEON-NOSVE-NEXT: stp q5, q3, [sp, #32] +; NONEON-NOSVE-NEXT: fcvtzu w9, d1 +; NONEON-NOSVE-NEXT: fcvtzu w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #96] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #168] +; NONEON-NOSVE-NEXT: fcvtzu w9, d1 +; NONEON-NOSVE-NEXT: fcvtzu w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #184] +; NONEON-NOSVE-NEXT: fcvtzu w9, d1 +; NONEON-NOSVE-NEXT: fcvtzu w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #176] +; NONEON-NOSVE-NEXT: fcvtzu w9, d1 +; NONEON-NOSVE-NEXT: fcvtzu w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] +; NONEON-NOSVE-NEXT: fcvtzu w9, d1 +; NONEON-NOSVE-NEXT: fcvtzu w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152] +; NONEON-NOSVE-NEXT: fcvtzu w9, d1 +; NONEON-NOSVE-NEXT: fcvtzu w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] +; NONEON-NOSVE-NEXT: fcvtzu w9, d1 +; NONEON-NOSVE-NEXT: fcvtzu w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #112] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #160] +; NONEON-NOSVE-NEXT: fcvtzu w9, d1 +; NONEON-NOSVE-NEXT: fcvtzu w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d2, [sp, #176] +; NONEON-NOSVE-NEXT: ldr d1, [sp, #168] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #232] +; NONEON-NOSVE-NEXT: ldr d1, [sp, #136] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #192] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #232] +; NONEON-NOSVE-NEXT: str d2, [sp, #248] +; NONEON-NOSVE-NEXT: ldp d0, d2, [sp, #144] +; NONEON-NOSVE-NEXT: strh w9, [sp, #270] +; NONEON-NOSVE-NEXT: strh w8, [sp, #268] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #248] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #200] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #160] +; NONEON-NOSVE-NEXT: strh w9, [sp, #266] +; NONEON-NOSVE-NEXT: strh w8, [sp, #264] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #240] +; NONEON-NOSVE-NEXT: stp d2, d0, [sp, #216] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #192] +; NONEON-NOSVE-NEXT: strh w9, [sp, #262] +; NONEON-NOSVE-NEXT: strh w8, [sp, #260] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #200] +; NONEON-NOSVE-NEXT: str d0, [sp, #296] +; NONEON-NOSVE-NEXT: strh w9, [sp, #258] +; NONEON-NOSVE-NEXT: strh w8, [sp, #256] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #216] +; NONEON-NOSVE-NEXT: strh w9, [sp, #286] +; NONEON-NOSVE-NEXT: strh w8, [sp, #284] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #208] +; NONEON-NOSVE-NEXT: strh w9, [sp, #282] +; NONEON-NOSVE-NEXT: strh w8, [sp, #280] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #224] +; NONEON-NOSVE-NEXT: strh w8, [sp, #276] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #300] +; NONEON-NOSVE-NEXT: strh w9, [sp, #278] +; NONEON-NOSVE-NEXT: strh w8, [sp, #274] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #296] +; NONEON-NOSVE-NEXT: strh w8, [sp, #272] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #256] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #304 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x double>, ptr %a %res = fptoui <16 x double> %op1 to <16 x i16> store <16 x i16> %res, ptr %b @@ -647,6 +1534,16 @@ define <1 x i32> @fcvtzu_v1f64_v1i32(<1 x double> %op1) { ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v1f64_v1i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fcvtzu w8, d0 +; NONEON-NOSVE-NEXT: str w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = fptoui <1 x double> %op1 to <1 x i32> ret <1 x i32> %res } @@ -660,6 +1557,18 @@ define <2 x i32> @fcvtzu_v2f64_v2i32(<2 x double> %op1) { ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v2f64_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp] +; NONEON-NOSVE-NEXT: fcvtzu w9, d1 +; NONEON-NOSVE-NEXT: fcvtzu w8, d0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fptoui <2 x double> %op1 to <2 x i32> ret <2 x i32> %res } @@ -677,6 +1586,23 @@ define <4 x i32> @fcvtzu_v4f64_v4i32(ptr %a) { ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v4f64_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: fcvtzu w9, d1 +; NONEON-NOSVE-NEXT: fcvtzu w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: fcvtzu w9, d1 +; NONEON-NOSVE-NEXT: fcvtzu w8, d0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %res = fptoui <4 x double> %op1 to <4 x i32> ret <4 x i32> %res @@ -701,6 +1627,36 @@ define void @fcvtzu_v8f64_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s ; CHECK-NEXT: stp q2, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v8f64_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0, #32] +; NONEON-NOSVE-NEXT: str q1, [sp] +; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: str q2, [sp, #48] +; NONEON-NOSVE-NEXT: fcvtzu w9, d1 +; NONEON-NOSVE-NEXT: fcvtzu w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] +; NONEON-NOSVE-NEXT: fcvtzu w9, d1 +; NONEON-NOSVE-NEXT: fcvtzu w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] +; NONEON-NOSVE-NEXT: fcvtzu w9, d1 +; NONEON-NOSVE-NEXT: fcvtzu w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] +; NONEON-NOSVE-NEXT: fcvtzu w9, d1 +; NONEON-NOSVE-NEXT: fcvtzu w8, d0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x double>, ptr %a %res = fptoui <8 x double> %op1 to <8 x i32> store <8 x i32> %res, ptr %b @@ -719,6 +1675,16 @@ define <1 x i64> @fcvtzu_v1f64_v1i64(<1 x double> %op1) { ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v1f64_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fcvtzu x8, d0 +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = fptoui <1 x double> %op1 to <1 x i64> ret <1 x i64> %res } @@ -731,6 +1697,18 @@ define <2 x i64> @fcvtzu_v2f64_v2i64(<2 x double> %op1) { ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v2f64_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp] +; NONEON-NOSVE-NEXT: fcvtzu x9, d1 +; NONEON-NOSVE-NEXT: fcvtzu x8, d0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fptoui <2 x double> %op1 to <2 x i64> ret <2 x i64> %res } @@ -744,6 +1722,24 @@ define void @fcvtzu_v4f64_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v4f64_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: fcvtzu x9, d1 +; NONEON-NOSVE-NEXT: fcvtzu x8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: fcvtzu x9, d1 +; NONEON-NOSVE-NEXT: fcvtzu x8, d0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %res = fptoui <4 x double> %op1 to <4 x i64> store <4 x i64> %res, ptr %b @@ -762,6 +1758,30 @@ define <4 x i16> @fcvtzs_v4f16_v4i16(<4 x half> %op1) { ; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v4f16_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #14] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #12] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #10] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = fptosi <4 x half> %op1 to <4 x i16> ret <4 x i16> %res } @@ -774,6 +1794,48 @@ define void @fcvtzs_v8f16_v8i16(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v8f16_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #22] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #20] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #18] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x half>, ptr %a %res = fptosi <8 x half> %op1 to <8 x i16> store <8 x i16> %res, ptr %b @@ -789,6 +1851,80 @@ define void @fcvtzs_v16f16_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzs z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v16f16_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %res = fptosi <16 x half> %op1 to <16 x i16> store <16 x i16> %res, ptr %b @@ -808,6 +1944,21 @@ define <2 x i32> @fcvtzs_v2f16_v2i32(<2 x half> %op1) { ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v2f16_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = fptosi <2 x half> %op1 to <2 x i32> ret <2 x i32> %res } @@ -821,6 +1972,29 @@ define <4 x i32> @fcvtzs_v4f16_v4i32(<4 x half> %op1) { ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v4f16_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: fcvtzs w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fptosi <4 x half> %op1 to <4 x i32> ret <4 x i32> %res } @@ -837,6 +2011,46 @@ define void @fcvtzs_v8f16_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v8f16_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: fcvtzs w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: fcvtzs w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: fcvtzs w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x half>, ptr %a %res = fptosi <8 x half> %op1 to <8 x i32> store <8 x i32> %res, ptr %b @@ -861,6 +2075,78 @@ define void @fcvtzs_v16f16_v16i32(ptr %a, ptr %b) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v16f16_v16i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-128]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] +; NONEON-NOSVE-NEXT: fcvtzs w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] +; NONEON-NOSVE-NEXT: fcvtzs w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] +; NONEON-NOSVE-NEXT: fcvtzs w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64] +; NONEON-NOSVE-NEXT: fcvtzs w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] +; NONEON-NOSVE-NEXT: fcvtzs w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] +; NONEON-NOSVE-NEXT: fcvtzs w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] +; NONEON-NOSVE-NEXT: fcvtzs w9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q2, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %res = fptosi <16 x half> %op1 to <16 x i32> store <16 x i32> %res, ptr %b @@ -877,6 +2163,17 @@ define <1 x i64> @fcvtzs_v1f16_v1i64(<1 x half> %op1) { ; CHECK-NEXT: fcvtzs x8, h0 ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v1f16_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = fptosi <1 x half> %op1 to <1 x i64> ret <1 x i64> %res } @@ -893,6 +2190,22 @@ define <2 x i64> @fcvtzs_v2f16_v2i64(<2 x half> %op1) { ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr q0, [sp], #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v2f16_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fptosi <2 x half> %op1 to <2 x i64> ret <2 x i64> %res } @@ -915,6 +2228,31 @@ define void @fcvtzs_v4f16_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v4f16_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #48 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: fcvtzs x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #16] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x half>, ptr %a %res = fptosi <4 x half> %op1 to <4 x i64> store <4 x i64> %res, ptr %b @@ -952,6 +2290,48 @@ define void @fcvtzs_v8f16_v8i64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q1, q0, [x1, #32] ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v8f16_v8i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #64] +; NONEON-NOSVE-NEXT: fcvtzs x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q2, q3, [sp, #64] +; NONEON-NOSVE-NEXT: fcvtzs x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: fcvtzs x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: stp q2, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x half>, ptr %a %res = fptosi <8 x half> %op1 to <8 x i64> store <8 x i64> %res, ptr %b @@ -1012,6 +2392,83 @@ define void @fcvtzs_v16f16_v16i64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q5, q2, [x1, #96] ; CHECK-NEXT: add sp, sp, #128 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v16f16_v16i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #192 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 192 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #96] +; NONEON-NOSVE-NEXT: fcvtzs x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #112] +; NONEON-NOSVE-NEXT: fcvtzs x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #64] +; NONEON-NOSVE-NEXT: fcvtzs x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q3, q4, [sp, #64] +; NONEON-NOSVE-NEXT: fcvtzs x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #160] +; NONEON-NOSVE-NEXT: fcvtzs x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #176] +; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #160] +; NONEON-NOSVE-NEXT: fcvtzs x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #128] +; NONEON-NOSVE-NEXT: fcvtzs x9, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #144] +; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #128] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32] +; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64] +; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96] +; NONEON-NOSVE-NEXT: add sp, sp, #192 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %res = fptosi <16 x half> %op1 to <16 x i64> store <16 x i64> %res, ptr %b @@ -1030,6 +2487,18 @@ define <2 x i16> @fcvtzs_v2f32_v2i16(<2 x float> %op1) { ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v2f32_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp] +; NONEON-NOSVE-NEXT: fcvtzs w9, s1 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = fptosi <2 x float> %op1 to <2 x i16> ret <2 x i16> %res } @@ -1043,6 +2512,24 @@ define <4 x i16> @fcvtzs_v4f32_v4i16(<4 x float> %op1) { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v4f32_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fptosi <4 x float> %op1 to <4 x i16> ret <4 x i16> %res } @@ -1060,6 +2547,35 @@ define <8 x i16> @fcvtzs_v8f32_v8i16(ptr %a) { ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v8f32_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %res = fptosi <8 x float> %op1 to <8 x i16> ret <8 x i16> %res @@ -1084,6 +2600,60 @@ define void @fcvtzs_v16f32_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h ; CHECK-NEXT: stp q2, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v16f32_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0, #32] +; NONEON-NOSVE-NEXT: str q1, [sp] +; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #40] +; NONEON-NOSVE-NEXT: str q2, [sp, #48] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #56] +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #48] +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: fcvtzs w8, s1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x float>, ptr %a %res = fptosi <16 x float> %op1 to <16 x i16> store <16 x i16> %res, ptr %b @@ -1102,6 +2672,18 @@ define <2 x i32> @fcvtzs_v2f32_v2i32(<2 x float> %op1) { ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v2f32_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp] +; NONEON-NOSVE-NEXT: fcvtzs w9, s1 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = fptosi <2 x float> %op1 to <2 x i32> ret <2 x i32> %res } @@ -1114,6 +2696,22 @@ define <4 x i32> @fcvtzs_v4f32_v4i32(<4 x float> %op1) { ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v4f32_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtzs w9, s1 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: fcvtzs w9, s1 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fptosi <4 x float> %op1 to <4 x i32> ret <4 x i32> %res } @@ -1127,6 +2725,32 @@ define void @fcvtzs_v8f32_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzs z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v8f32_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: fcvtzs w9, s1 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: fcvtzs w9, s1 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: fcvtzs w9, s1 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: fcvtzs w9, s1 +; NONEON-NOSVE-NEXT: fcvtzs w8, s0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %res = fptosi <8 x float> %op1 to <8 x i32> store <8 x i32> %res, ptr %b @@ -1146,6 +2770,17 @@ define <1 x i64> @fcvtzs_v1f32_v1i64(<1 x float> %op1) { ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v1f32_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = fptosi <1 x float> %op1 to <1 x i64> ret <1 x i64> %res } @@ -1159,6 +2794,19 @@ define <2 x i64> @fcvtzs_v2f32_v2i64(<2 x float> %op1) { ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v2f32_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtzs x9, s1 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fptosi <2 x float> %op1 to <2 x i64> ret <2 x i64> %res } @@ -1175,6 +2823,26 @@ define void @fcvtzs_v4f32_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v4f32_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: fcvtzs x9, s1 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: fcvtzs x9, s1 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x float>, ptr %a %res = fptosi <4 x float> %op1 to <4 x i64> store <4 x i64> %res, ptr %b @@ -1199,6 +2867,38 @@ define void @fcvtzs_v8f32_v8i64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v8f32_v8i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-128]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvtzs x9, s1 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #32] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #80] +; NONEON-NOSVE-NEXT: fcvtzs x9, s1 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #56] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #64] +; NONEON-NOSVE-NEXT: fcvtzs x9, s1 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #48] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #112] +; NONEON-NOSVE-NEXT: fcvtzs x9, s1 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q2, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %res = fptosi <8 x float> %op1 to <8 x i64> store <8 x i64> %res, ptr %b @@ -1218,6 +2918,16 @@ define <1 x i16> @fcvtzs_v1f64_v1i16(<1 x double> %op1) { ; CHECK-NEXT: mov z0.h, w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v1f64_v1i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = fptosi <1 x double> %op1 to <1 x i16> ret <1 x i16> %res } @@ -1231,6 +2941,18 @@ define <2 x i16> @fcvtzs_v2f64_v2i16(<2 x double> %op1) { ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v2f64_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp] +; NONEON-NOSVE-NEXT: fcvtzs w9, d1 +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fptosi <2 x double> %op1 to <2 x i16> ret <2 x i16> %res } @@ -1259,6 +2981,31 @@ define <4 x i16> @fcvtzs_v4f64_v4i16(ptr %a) { ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v4f64_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-80]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: fcvtzs w9, d1 +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: fcvtzs w9, d1 +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #40] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #56] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #64] +; NONEON-NOSVE-NEXT: strh w9, [sp, #78] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: strh w9, [sp, #74] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #72] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %res = fptosi <4 x double> %op1 to <4 x i16> ret <4 x i16> %res @@ -1302,6 +3049,53 @@ define <8 x i16> @fcvtzs_v8f64_v8i16(ptr %a) { ; CHECK-NEXT: strh w8, [sp, #2] ; CHECK-NEXT: ldr q0, [sp], #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v8f64_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #144 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 144 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: str q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp q0, q3, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: str q2, [sp] +; NONEON-NOSVE-NEXT: fcvtzs w9, d1 +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] +; NONEON-NOSVE-NEXT: fcvtzs w9, d1 +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] +; NONEON-NOSVE-NEXT: fcvtzs w9, d1 +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] +; NONEON-NOSVE-NEXT: fcvtzs w9, d1 +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d2, [sp, #80] +; NONEON-NOSVE-NEXT: ldr d1, [sp, #72] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #104] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #104] +; NONEON-NOSVE-NEXT: str d2, [sp, #120] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #64] +; NONEON-NOSVE-NEXT: strh w9, [sp, #142] +; NONEON-NOSVE-NEXT: strh w8, [sp, #140] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #120] +; NONEON-NOSVE-NEXT: str d0, [sp, #96] +; NONEON-NOSVE-NEXT: strh w9, [sp, #138] +; NONEON-NOSVE-NEXT: strh w8, [sp, #136] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #112] +; NONEON-NOSVE-NEXT: strh w9, [sp, #134] +; NONEON-NOSVE-NEXT: strh w8, [sp, #132] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #96] +; NONEON-NOSVE-NEXT: strh w9, [sp, #130] +; NONEON-NOSVE-NEXT: strh w8, [sp, #128] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #128] +; NONEON-NOSVE-NEXT: add sp, sp, #144 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x double>, ptr %a %res = fptosi <8 x double> %op1 to <8 x i16> ret <8 x i16> %res @@ -1378,6 +3172,94 @@ define void @fcvtzs_v16f64_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v16f64_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #304 +; NONEON-NOSVE-NEXT: str x29, [sp, #288] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 304 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32] +; NONEON-NOSVE-NEXT: ldr x29, [sp, #288] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp q6, q7, [x0] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0, #64] +; NONEON-NOSVE-NEXT: ldp q4, q5, [x0, #96] +; NONEON-NOSVE-NEXT: stp q1, q7, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q2, [sp, #96] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q6, q4, [sp] +; NONEON-NOSVE-NEXT: stp q5, q3, [sp, #32] +; NONEON-NOSVE-NEXT: fcvtzs w9, d1 +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #96] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #168] +; NONEON-NOSVE-NEXT: fcvtzs w9, d1 +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #184] +; NONEON-NOSVE-NEXT: fcvtzs w9, d1 +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #176] +; NONEON-NOSVE-NEXT: fcvtzs w9, d1 +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] +; NONEON-NOSVE-NEXT: fcvtzs w9, d1 +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152] +; NONEON-NOSVE-NEXT: fcvtzs w9, d1 +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] +; NONEON-NOSVE-NEXT: fcvtzs w9, d1 +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #112] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #160] +; NONEON-NOSVE-NEXT: fcvtzs w9, d1 +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d2, [sp, #176] +; NONEON-NOSVE-NEXT: ldr d1, [sp, #168] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #232] +; NONEON-NOSVE-NEXT: ldr d1, [sp, #136] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #192] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #232] +; NONEON-NOSVE-NEXT: str d2, [sp, #248] +; NONEON-NOSVE-NEXT: ldp d0, d2, [sp, #144] +; NONEON-NOSVE-NEXT: strh w9, [sp, #270] +; NONEON-NOSVE-NEXT: strh w8, [sp, #268] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #248] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #200] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #160] +; NONEON-NOSVE-NEXT: strh w9, [sp, #266] +; NONEON-NOSVE-NEXT: strh w8, [sp, #264] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #240] +; NONEON-NOSVE-NEXT: stp d2, d0, [sp, #216] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #192] +; NONEON-NOSVE-NEXT: strh w9, [sp, #262] +; NONEON-NOSVE-NEXT: strh w8, [sp, #260] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #200] +; NONEON-NOSVE-NEXT: str d0, [sp, #296] +; NONEON-NOSVE-NEXT: strh w9, [sp, #258] +; NONEON-NOSVE-NEXT: strh w8, [sp, #256] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #216] +; NONEON-NOSVE-NEXT: strh w9, [sp, #286] +; NONEON-NOSVE-NEXT: strh w8, [sp, #284] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #208] +; NONEON-NOSVE-NEXT: strh w9, [sp, #282] +; NONEON-NOSVE-NEXT: strh w8, [sp, #280] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #224] +; NONEON-NOSVE-NEXT: strh w8, [sp, #276] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #300] +; NONEON-NOSVE-NEXT: strh w9, [sp, #278] +; NONEON-NOSVE-NEXT: strh w8, [sp, #274] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #296] +; NONEON-NOSVE-NEXT: strh w8, [sp, #272] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #256] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #304 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x double>, ptr %a %res = fptosi <16 x double> %op1 to <16 x i16> store <16 x i16> %res, ptr %b @@ -1397,6 +3279,16 @@ define <1 x i32> @fcvtzs_v1f64_v1i32(<1 x double> %op1) { ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v1f64_v1i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: str w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = fptosi <1 x double> %op1 to <1 x i32> ret <1 x i32> %res } @@ -1410,6 +3302,18 @@ define <2 x i32> @fcvtzs_v2f64_v2i32(<2 x double> %op1) { ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v2f64_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp] +; NONEON-NOSVE-NEXT: fcvtzs w9, d1 +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fptosi <2 x double> %op1 to <2 x i32> ret <2 x i32> %res } @@ -1427,6 +3331,23 @@ define <4 x i32> @fcvtzs_v4f64_v4i32(ptr %a) { ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v4f64_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: fcvtzs w9, d1 +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: fcvtzs w9, d1 +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %res = fptosi <4 x double> %op1 to <4 x i32> ret <4 x i32> %res @@ -1451,6 +3372,36 @@ define void @fcvtzs_v8f64_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s ; CHECK-NEXT: stp q2, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v8f64_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0, #32] +; NONEON-NOSVE-NEXT: str q1, [sp] +; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: str q2, [sp, #48] +; NONEON-NOSVE-NEXT: fcvtzs w9, d1 +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] +; NONEON-NOSVE-NEXT: fcvtzs w9, d1 +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] +; NONEON-NOSVE-NEXT: fcvtzs w9, d1 +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] +; NONEON-NOSVE-NEXT: fcvtzs w9, d1 +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x double>, ptr %a %res = fptosi <8 x double> %op1 to <8 x i32> store <8 x i32> %res, ptr %b @@ -1469,6 +3420,16 @@ define <1 x i64> @fcvtzs_v1f64_v1i64(<1 x double> %op1) { ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v1f64_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fcvtzs x8, d0 +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = fptosi <1 x double> %op1 to <1 x i64> ret <1 x i64> %res } @@ -1481,6 +3442,18 @@ define <2 x i64> @fcvtzs_v2f64_v2i64(<2 x double> %op1) { ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v2f64_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp] +; NONEON-NOSVE-NEXT: fcvtzs x9, d1 +; NONEON-NOSVE-NEXT: fcvtzs x8, d0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fptosi <2 x double> %op1 to <2 x i64> ret <2 x i64> %res } @@ -1494,6 +3467,24 @@ define void @fcvtzs_v4f64_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v4f64_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: fcvtzs x9, d1 +; NONEON-NOSVE-NEXT: fcvtzs x8, d0 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: fcvtzs x9, d1 +; NONEON-NOSVE-NEXT: fcvtzs x8, d0 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %res = fptosi <4 x double> %op1 to <4 x i64> store <4 x i64> %res, ptr %b diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll index c1c7b5c05f5d5..69661049bcb6f 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -27,6 +28,31 @@ define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x i1> %mask ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: str d0, [sp] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: str w10, [sp, #28] +; NONEON-NOSVE-NEXT: tst w9, #0xffff +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: tst w8, #0xffff +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %sel = select <2 x i1> %mask, <2 x half> %op1, <2 x half> %op2 ret <2 x half> %sel } @@ -45,6 +71,44 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x i1> %mask ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #22] +; NONEON-NOSVE-NEXT: str d0, [sp] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #20] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #18] +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: tst w9, #0xffff +; NONEON-NOSVE-NEXT: sbfx w9, w11, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: tst w9, #0xffff +; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: tst w9, #0xffff +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: tst w8, #0xffff +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %sel = select <4 x i1> %mask, <4 x half> %op1, <4 x half> %op2 ret <4 x half> %sel } @@ -64,6 +128,72 @@ define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x i1> %mask ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: str d2, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #47] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #45] +; NONEON-NOSVE-NEXT: sbfx w13, w13, #0, #1 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #40] +; NONEON-NOSVE-NEXT: tst w13, #0xffff +; NONEON-NOSVE-NEXT: sbfx w13, w15, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w12, w12, #0, #1 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: sbfx w11, w11, #0, #1 +; NONEON-NOSVE-NEXT: tst w13, #0xffff +; NONEON-NOSVE-NEXT: sbfx w13, w14, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: str h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: tst w13, #0xffff +; NONEON-NOSVE-NEXT: str h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: tst w12, #0xffff +; NONEON-NOSVE-NEXT: str h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: tst w11, #0xffff +; NONEON-NOSVE-NEXT: str h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: tst w10, #0xffff +; NONEON-NOSVE-NEXT: str h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: tst w9, #0xffff +; NONEON-NOSVE-NEXT: str h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: tst w8, #0xffff +; NONEON-NOSVE-NEXT: str h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: str h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %sel = select <8 x i1> %mask, <8 x half> %op1, <8 x half> %op2 ret <8 x half> %sel } @@ -80,6 +210,130 @@ define void @select_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: sel z1.h, p0, z2.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h4, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h5, [sp, #4] +; NONEON-NOSVE-NEXT: ldr h16, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s2, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h1 +; NONEON-NOSVE-NEXT: ldr h17, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s6, h4 +; NONEON-NOSVE-NEXT: fcvt s7, h5 +; NONEON-NOSVE-NEXT: ldr h19, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s18, h17 +; NONEON-NOSVE-NEXT: ldr h21, [sp, #10] +; NONEON-NOSVE-NEXT: ldr h22, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s20, h19 +; NONEON-NOSVE-NEXT: ldr h24, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h25, [sp, #34] +; NONEON-NOSVE-NEXT: fcmp s3, s2 +; NONEON-NOSVE-NEXT: fcvt s2, h16 +; NONEON-NOSVE-NEXT: ldr h3, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h26, [sp, #36] +; NONEON-NOSVE-NEXT: ldr h27, [sp, #38] +; NONEON-NOSVE-NEXT: ldr h28, [sp, #42] +; NONEON-NOSVE-NEXT: ldr h29, [sp, #44] +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, eq +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: fcvt s7, h3 +; NONEON-NOSVE-NEXT: ldr h6, [sp, #26] +; NONEON-NOSVE-NEXT: fcsel s1, s5, s4, eq +; NONEON-NOSVE-NEXT: fcmp s18, s2 +; NONEON-NOSVE-NEXT: fcvt s4, h6 +; NONEON-NOSVE-NEXT: fcvt s18, h21 +; NONEON-NOSVE-NEXT: ldr h5, [sp, #28] +; NONEON-NOSVE-NEXT: str h0, [sp, #66] +; NONEON-NOSVE-NEXT: fcsel s2, s17, s16, eq +; NONEON-NOSVE-NEXT: fcmp s20, s7 +; NONEON-NOSVE-NEXT: fcvt s16, h5 +; NONEON-NOSVE-NEXT: fcvt s17, h22 +; NONEON-NOSVE-NEXT: ldr h7, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h20, [sp, #14] +; NONEON-NOSVE-NEXT: str h1, [sp, #68] +; NONEON-NOSVE-NEXT: fcsel s3, s19, s3, eq +; NONEON-NOSVE-NEXT: fcmp s18, s4 +; NONEON-NOSVE-NEXT: fcvt s19, h7 +; NONEON-NOSVE-NEXT: fcvt s23, h20 +; NONEON-NOSVE-NEXT: ldr h18, [sp, #48] +; NONEON-NOSVE-NEXT: str h2, [sp, #70] +; NONEON-NOSVE-NEXT: fcsel s4, s21, s6, eq +; NONEON-NOSVE-NEXT: fcmp s17, s16 +; NONEON-NOSVE-NEXT: fcvt s17, h18 +; NONEON-NOSVE-NEXT: fcvt s21, h24 +; NONEON-NOSVE-NEXT: ldr h16, [sp, #50] +; NONEON-NOSVE-NEXT: str h3, [sp, #72] +; NONEON-NOSVE-NEXT: fcsel s5, s22, s5, eq +; NONEON-NOSVE-NEXT: fcmp s23, s19 +; NONEON-NOSVE-NEXT: fcvt s22, h16 +; NONEON-NOSVE-NEXT: fcvt s23, h25 +; NONEON-NOSVE-NEXT: ldr h19, [sp, #52] +; NONEON-NOSVE-NEXT: str h4, [sp, #74] +; NONEON-NOSVE-NEXT: fcsel s6, s20, s7, eq +; NONEON-NOSVE-NEXT: fcmp s21, s17 +; NONEON-NOSVE-NEXT: fcvt s20, h19 +; NONEON-NOSVE-NEXT: fcvt s21, h26 +; NONEON-NOSVE-NEXT: ldr h17, [sp, #54] +; NONEON-NOSVE-NEXT: str h5, [sp, #76] +; NONEON-NOSVE-NEXT: fcsel s7, s24, s18, eq +; NONEON-NOSVE-NEXT: fcmp s23, s22 +; NONEON-NOSVE-NEXT: fcvt s22, h17 +; NONEON-NOSVE-NEXT: fcvt s23, h27 +; NONEON-NOSVE-NEXT: ldr h18, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h24, [sp, #40] +; NONEON-NOSVE-NEXT: str h6, [sp, #78] +; NONEON-NOSVE-NEXT: fcsel s16, s25, s16, eq +; NONEON-NOSVE-NEXT: fcmp s21, s20 +; NONEON-NOSVE-NEXT: fcvt s21, h18 +; NONEON-NOSVE-NEXT: fcvt s25, h24 +; NONEON-NOSVE-NEXT: ldr h20, [sp, #58] +; NONEON-NOSVE-NEXT: str h7, [sp, #80] +; NONEON-NOSVE-NEXT: fcsel s19, s26, s19, eq +; NONEON-NOSVE-NEXT: fcmp s23, s22 +; NONEON-NOSVE-NEXT: fcvt s23, h20 +; NONEON-NOSVE-NEXT: fcvt s26, h28 +; NONEON-NOSVE-NEXT: ldr h22, [sp, #60] +; NONEON-NOSVE-NEXT: str h16, [sp, #82] +; NONEON-NOSVE-NEXT: fcsel s17, s27, s17, eq +; NONEON-NOSVE-NEXT: fcmp s25, s21 +; NONEON-NOSVE-NEXT: fcvt s25, h22 +; NONEON-NOSVE-NEXT: fcvt s27, h29 +; NONEON-NOSVE-NEXT: ldr h21, [sp, #62] +; NONEON-NOSVE-NEXT: str h19, [sp, #84] +; NONEON-NOSVE-NEXT: fcsel s18, s24, s18, eq +; NONEON-NOSVE-NEXT: ldr h24, [sp, #46] +; NONEON-NOSVE-NEXT: fcmp s26, s23 +; NONEON-NOSVE-NEXT: fcvt s23, h21 +; NONEON-NOSVE-NEXT: str h17, [sp, #86] +; NONEON-NOSVE-NEXT: fcvt s26, h24 +; NONEON-NOSVE-NEXT: fcsel s20, s28, s20, eq +; NONEON-NOSVE-NEXT: fcmp s27, s25 +; NONEON-NOSVE-NEXT: ldr h25, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h27, [sp] +; NONEON-NOSVE-NEXT: str h18, [sp, #88] +; NONEON-NOSVE-NEXT: fcvt s17, h25 +; NONEON-NOSVE-NEXT: fcvt s18, h27 +; NONEON-NOSVE-NEXT: fcsel s7, s29, s22, eq +; NONEON-NOSVE-NEXT: fcmp s26, s23 +; NONEON-NOSVE-NEXT: str h20, [sp, #90] +; NONEON-NOSVE-NEXT: fcsel s16, s24, s21, eq +; NONEON-NOSVE-NEXT: str h7, [sp, #92] +; NONEON-NOSVE-NEXT: fcmp s18, s17 +; NONEON-NOSVE-NEXT: str h16, [sp, #94] +; NONEON-NOSVE-NEXT: fcsel s2, s27, s25, eq +; NONEON-NOSVE-NEXT: str h2, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %mask = fcmp oeq <16 x half> %op1, %op2 @@ -102,6 +356,26 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x i1> %m ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp d2, d0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp] +; NONEON-NOSVE-NEXT: str d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: sbfx w8, w9, #0, #1 +; NONEON-NOSVE-NEXT: fcsel s3, s2, s0, ne +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %sel = select <2 x i1> %mask, <2 x float> %op1, <2 x float> %op2 ret <2 x float> %sel } @@ -121,6 +395,40 @@ define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x i1> %m ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: str d2, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #12] +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #24] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #44] +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w9, #0 +; NONEON-NOSVE-NEXT: sbfx w9, w11, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: fcsel s3, s2, s0, ne +; NONEON-NOSVE-NEXT: ldr s0, [sp, #40] +; NONEON-NOSVE-NEXT: cmp w9, #0 +; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, #0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #56] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #36] +; NONEON-NOSVE-NEXT: fcsel s3, s2, s0, ne +; NONEON-NOSVE-NEXT: ldr s0, [sp, #32] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %sel = select <4 x i1> %mask, <4 x float> %op1, <4 x float> %op2 ret <4 x float> %sel } @@ -137,6 +445,49 @@ define void @select_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: sel z1.s, p0, z2.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: stp q2, q3, [sp, #32] +; NONEON-NOSVE-NEXT: ldp s0, s2, [sp, #20] +; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #4] +; NONEON-NOSVE-NEXT: ldr s4, [sp, #12] +; NONEON-NOSVE-NEXT: ldr s17, [sp] +; NONEON-NOSVE-NEXT: ldp s6, s7, [sp, #36] +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, eq +; NONEON-NOSVE-NEXT: fcmp s3, s2 +; NONEON-NOSVE-NEXT: ldp s1, s5, [sp, #28] +; NONEON-NOSVE-NEXT: fcsel s2, s3, s2, eq +; NONEON-NOSVE-NEXT: ldp s16, s3, [sp, #44] +; NONEON-NOSVE-NEXT: fcmp s4, s1 +; NONEON-NOSVE-NEXT: fcsel s1, s4, s1, eq +; NONEON-NOSVE-NEXT: fcmp s5, s3 +; NONEON-NOSVE-NEXT: ldr s4, [sp, #52] +; NONEON-NOSVE-NEXT: fcsel s3, s5, s3, eq +; NONEON-NOSVE-NEXT: fcmp s6, s4 +; NONEON-NOSVE-NEXT: ldr s5, [sp, #56] +; NONEON-NOSVE-NEXT: stp s2, s1, [sp, #72] +; NONEON-NOSVE-NEXT: fcsel s4, s6, s4, eq +; NONEON-NOSVE-NEXT: fcmp s7, s5 +; NONEON-NOSVE-NEXT: ldr s6, [sp, #60] +; NONEON-NOSVE-NEXT: fcsel s5, s7, s5, eq +; NONEON-NOSVE-NEXT: fcmp s16, s6 +; NONEON-NOSVE-NEXT: ldr s7, [sp, #16] +; NONEON-NOSVE-NEXT: stp s3, s4, [sp, #80] +; NONEON-NOSVE-NEXT: fcsel s6, s16, s6, eq +; NONEON-NOSVE-NEXT: fcmp s17, s7 +; NONEON-NOSVE-NEXT: fcsel s3, s17, s7, eq +; NONEON-NOSVE-NEXT: stp s5, s6, [sp, #88] +; NONEON-NOSVE-NEXT: stp s3, s0, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %mask = fcmp oeq <8 x float> %op1, %op2 @@ -151,6 +502,17 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x i1> ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: fcsel d0, d0, d1, ne ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: fcsel d0, d0, d1, ne +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %sel = select <1 x i1> %mask, <1 x double> %op1, <1 x double> %op2 ret <1 x double> %sel } @@ -170,6 +532,27 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x i1> ; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: str d2, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp, #16] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: sbfx x8, x8, #0, #1 +; NONEON-NOSVE-NEXT: cmp x8, #0 +; NONEON-NOSVE-NEXT: sbfx x8, x9, #0, #1 +; NONEON-NOSVE-NEXT: fcsel d3, d2, d0, ne +; NONEON-NOSVE-NEXT: ldr d0, [sp, #32] +; NONEON-NOSVE-NEXT: cmp x8, #0 +; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, ne +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %sel = select <2 x i1> %mask, <2 x double> %op1, <2 x double> %op2 ret <2 x double> %sel } @@ -186,6 +569,33 @@ define void @select_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: sel z1.d, p0, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: stp q2, q3, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d5, d1, [sp] +; NONEON-NOSVE-NEXT: ldp d0, d3, [sp, #24] +; NONEON-NOSVE-NEXT: ldp d4, d2, [sp, #40] +; NONEON-NOSVE-NEXT: fcmp d1, d0 +; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, eq +; NONEON-NOSVE-NEXT: fcmp d3, d2 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #56] +; NONEON-NOSVE-NEXT: fcsel d2, d3, d2, eq +; NONEON-NOSVE-NEXT: fcmp d4, d1 +; NONEON-NOSVE-NEXT: ldr d3, [sp, #16] +; NONEON-NOSVE-NEXT: fcsel d1, d4, d1, eq +; NONEON-NOSVE-NEXT: fcmp d5, d3 +; NONEON-NOSVE-NEXT: fcsel d3, d5, d3, eq +; NONEON-NOSVE-NEXT: stp d2, d1, [sp, #80] +; NONEON-NOSVE-NEXT: stp d3, d0, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %mask = fcmp oeq <4 x double> %op1, %op2 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll index ff38db8c10c04..3ba61c3335a64 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -21,6 +22,25 @@ define <4 x i8> @insertelement_v4i8(<4 x i8> %op1) { ; CHECK-NEXT: mov z0.h, p0/m, w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: str d0, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: strh w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: str w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %r = insertelement <4 x i8> %op1, i8 5, i64 3 ret <4 x i8> %r } @@ -38,6 +58,27 @@ define <8 x i8> @insertelement_v8i8(<8 x i8> %op1) { ; CHECK-NEXT: mov z0.b, p0/m, w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: str d0, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: strb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: str w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %r = insertelement <8 x i8> %op1, i8 5, i64 7 ret <8 x i8> %r } @@ -55,6 +96,29 @@ define <16 x i8> @insertelement_v16i8(<16 x i8> %op1) { ; CHECK-NEXT: mov z0.b, p0/m, w8 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: str w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: str x8, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %r = insertelement <16 x i8> %op1, i8 5, i64 15 ret <16 x i8> %r } @@ -72,6 +136,29 @@ define <32 x i8> @insertelement_v32i8(<32 x i8> %op1) { ; CHECK-NEXT: mov z1.b, p0/m, w8 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: str q1, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #48] +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: str q1, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: str w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: str x8, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %r = insertelement <32 x i8> %op1, i8 5, i64 31 ret <32 x i8> %r } @@ -90,6 +177,22 @@ define <2 x i16> @insertelement_v2i16(<2 x i16> %op1) { ; CHECK-NEXT: mov z0.s, p0/m, w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: str d0, [sp] +; NONEON-NOSVE-NEXT: str w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr w9, [sp] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %r = insertelement <2 x i16> %op1, i16 5, i64 1 ret <2 x i16> %r } @@ -107,6 +210,25 @@ define <4 x i16> @insertelement_v4i16(<4 x i16> %op1) { ; CHECK-NEXT: mov z0.h, p0/m, w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: str d0, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: strh w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: str w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %r = insertelement <4 x i16> %op1, i16 5, i64 3 ret <4 x i16> %r } @@ -124,6 +246,27 @@ define <8 x i16> @insertelement_v8i16(<8 x i16> %op1) { ; CHECK-NEXT: mov z0.h, p0/m, w8 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: str w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: str x8, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %r = insertelement <8 x i16> %op1, i16 5, i64 7 ret <8 x i16> %r } @@ -141,6 +284,27 @@ define <16 x i16> @insertelement_v16i16(<16 x i16> %op1) { ; CHECK-NEXT: mov z1.h, p0/m, w8 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: str q1, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #48] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: str q1, [sp, #16] +; NONEON-NOSVE-NEXT: str w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: str x8, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %r = insertelement <16 x i16> %op1, i16 5, i64 15 ret <16 x i16> %r } @@ -159,6 +323,22 @@ define <2 x i32> @insertelement_v2i32(<2 x i32> %op1) { ; CHECK-NEXT: mov z0.s, p0/m, w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: str d0, [sp] +; NONEON-NOSVE-NEXT: str w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr w9, [sp] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %r = insertelement <2 x i32> %op1, i32 5, i64 1 ret <2 x i32> %r } @@ -176,6 +356,24 @@ define <4 x i32> @insertelement_v4i32(<4 x i32> %op1) { ; CHECK-NEXT: mov z0.s, p0/m, w8 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: str w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: str x8, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %r = insertelement <4 x i32> %op1, i32 5, i64 3 ret <4 x i32> %r } @@ -193,6 +391,24 @@ define <8 x i32> @insertelement_v8i32(ptr %a) { ; CHECK-NEXT: mov z1.s, p0/m, w8 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: ldp q0, q2, [x0] +; NONEON-NOSVE-NEXT: str w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp q2, q1, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] +; NONEON-NOSVE-NEXT: str x8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %r = insertelement <8 x i32> %op1, i32 5, i64 7 ret <8 x i32> %r @@ -205,6 +421,16 @@ define <1 x i64> @insertelement_v1i64(<1 x i64> %op1) { ; CHECK-NEXT: mov z0.d, #5 // =0x5 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %r = insertelement <1 x i64> %op1, i64 5, i64 0 ret <1 x i64> %r } @@ -222,6 +448,22 @@ define <2 x i64> @insertelement_v2i64(<2 x i64> %op1) { ; CHECK-NEXT: mov z0.d, p0/m, x8 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: str x8, [sp, #48] +; NONEON-NOSVE-NEXT: ldr x9, [sp] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %r = insertelement <2 x i64> %op1, i64 5, i64 1 ret <2 x i64> %r } @@ -239,6 +481,22 @@ define <4 x i64> @insertelement_v4i64(ptr %a) { ; CHECK-NEXT: mov z1.d, p0/m, x8 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: ldp q0, q2, [x0] +; NONEON-NOSVE-NEXT: str x8, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp q2, q1, [sp] +; NONEON-NOSVE-NEXT: ldr x9, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %r = insertelement <4 x i64> %op1, i64 5, i64 3 ret <4 x i64> %r @@ -257,6 +515,19 @@ define <2 x half> @insertelement_v2f16(<2 x half> %op1) { ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI14_0 +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [x8, :lo12:.LCPI14_0] +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: str h1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %r = insertelement <2 x half> %op1, half 5.0, i64 1 ret <2 x half> %r } @@ -274,6 +545,26 @@ define <4 x half> @insertelement_v4f16(<4 x half> %op1) { ; CHECK-NEXT: mov z0.h, p0/m, h1 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI15_0 +; NONEON-NOSVE-NEXT: str d0, [sp] +; NONEON-NOSVE-NEXT: ldr h1, [x8, :lo12:.LCPI15_0] +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: str h1, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: str w8, [sp, #16] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: str h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: str h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %r = insertelement <4 x half> %op1, half 5.0, i64 3 ret <4 x half> %r } @@ -291,6 +582,28 @@ define <8 x half> @insertelement_v8f16(<8 x half> %op1) { ; CHECK-NEXT: mov z0.h, p0/m, h1 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI16_0 +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: ldr h1, [x8, :lo12:.LCPI16_0] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: str h1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: str w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: str x8, [sp, #32] +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %r = insertelement <8 x half> %op1, half 5.0, i64 7 ret <8 x half> %r } @@ -308,6 +621,28 @@ define <16 x half> @insertelement_v16f16(ptr %a) { ; CHECK-NEXT: mov z1.h, p0/m, h2 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI17_0 +; NONEON-NOSVE-NEXT: ldp q0, q2, [x0] +; NONEON-NOSVE-NEXT: ldr h1, [x8, :lo12:.LCPI17_0] +; NONEON-NOSVE-NEXT: str h1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp q2, q1, [sp] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: str h1, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #16] +; NONEON-NOSVE-NEXT: str w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: str h1, [sp, #46] +; NONEON-NOSVE-NEXT: str x8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %r = insertelement <16 x half> %op1, half 5.0, i64 15 ret <16 x half> %r @@ -327,6 +662,22 @@ define <2 x float> @insertelement_v2f32(<2 x float> %op1) { ; CHECK-NEXT: mov z0.s, p0/m, s1 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: mov w8, #1084227584 // =0x40a00000 +; NONEON-NOSVE-NEXT: str d0, [sp] +; NONEON-NOSVE-NEXT: str w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr s1, [sp] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: stp s1, s0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %r = insertelement <2 x float> %op1, float 5.0, i64 1 ret <2 x float> %r } @@ -344,6 +695,24 @@ define <4 x float> @insertelement_v4f32(<4 x float> %op1) { ; CHECK-NEXT: mov z0.s, p0/m, s1 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: mov w8, #1084227584 // =0x40a00000 +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: str w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldr s1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: str x8, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: stp s1, s0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %r = insertelement <4 x float> %op1, float 5.0, i64 3 ret <4 x float> %r } @@ -361,6 +730,25 @@ define <8 x float> @insertelement_v8f32(ptr %a) { ; CHECK-NEXT: mov z1.s, p0/m, s2 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: mov w8, #1084227584 // =0x40a00000 +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: str w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp] +; NONEON-NOSVE-NEXT: ldr s1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: str x8, [sp, #32] +; NONEON-NOSVE-NEXT: stp s1, s0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %r = insertelement <8 x float> %op1, float 5.0, i64 7 ret <8 x float> %r @@ -372,6 +760,16 @@ define <1 x double> @insertelement_v1f64(<1 x double> %op1) { ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d0, #5.00000000 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: mov x8, #4617315517961601024 // =0x4014000000000000 +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %r = insertelement <1 x double> %op1, double 5.0, i64 0 ret <1 x double> %r } @@ -389,6 +787,22 @@ define <2 x double> @insertelement_v2f64(<2 x double> %op1) { ; CHECK-NEXT: mov z0.d, p0/m, d1 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: mov x8, #4617315517961601024 // =0x4014000000000000 +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: str x8, [sp, #48] +; NONEON-NOSVE-NEXT: ldr d1, [sp] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %r = insertelement <2 x double> %op1, double 5.0, i64 1 ret <2 x double> %r } @@ -406,6 +820,23 @@ define <4 x double> @insertelement_v4f64(ptr %a) { ; CHECK-NEXT: mov z1.d, p0/m, d2 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: mov x8, #4617315517961601024 // =0x4014000000000000 +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: str x8, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp] +; NONEON-NOSVE-NEXT: ldr d1, [sp] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %r = insertelement <4 x double> %op1, double 5.0, i64 3 ret <4 x double> %r diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll index ee1706bc7c354..a2875ffef2e88 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll @@ -2,6 +2,7 @@ ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE ; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2 ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2 +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -16,6 +17,31 @@ define <4 x i8> @add_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = add <4 x i8> %op1, %op2 ret <4 x i8> %res } @@ -28,6 +54,47 @@ define <8 x i8> @add_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: add z0.b, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = add <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -40,6 +107,78 @@ define <16 x i8> @add_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: add z0.b, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = add <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -53,6 +192,147 @@ define void @add_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: add z1.b, z2.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #47] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #95] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #62] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #61] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #93] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #60] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #59] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #91] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #58] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #57] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #89] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #56] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #55] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #54] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #53] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #85] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #52] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #51] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #50] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #49] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #81] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #48] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #79] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #77] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #75] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #73] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #71] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #69] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #67] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #65] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = add <32 x i8> %op1, %op2 @@ -68,6 +348,22 @@ define <2 x i16> @add_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; CHECK-NEXT: add z0.s, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: add w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = add <2 x i16> %op1, %op2 ret <2 x i16> %res } @@ -80,6 +376,31 @@ define <4 x i16> @add_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = add <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -92,6 +413,46 @@ define <8 x i16> @add_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = add <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -105,6 +466,83 @@ define void @add_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: add z1.h, z2.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #46] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #44] +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #42] +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #58] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #40] +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #38] +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #54] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #36] +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #34] +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #50] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = add <16 x i16> %op1, %op2 @@ -120,6 +558,22 @@ define <2 x i32> @add_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: add z0.s, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: add w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = add <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -132,6 +586,28 @@ define <4 x i32> @add_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: add z0.s, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: add w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: str w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: add w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = add <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -145,6 +621,47 @@ define void @add_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: add z1.s, z2.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] +; NONEON-NOSVE-NEXT: add w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #32] +; NONEON-NOSVE-NEXT: str w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] +; NONEON-NOSVE-NEXT: add w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: str w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: add w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: str w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: add w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = add <8 x i32> %op1, %op2 @@ -160,6 +677,18 @@ define <1 x i64> @add_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: add z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: add x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = add <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -172,6 +701,21 @@ define <2 x i64> @add_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: add z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: add x8, x10, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: add x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = add <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -185,6 +729,33 @@ define void @add_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: add z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #56] +; NONEON-NOSVE-NEXT: add x8, x10, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #88] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #48] +; NONEON-NOSVE-NEXT: add x8, x9, x8 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: str x8, [sp, #80] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: add x8, x10, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #72] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: add x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = add <4 x i64> %op1, %op2 @@ -213,6 +784,31 @@ define <4 x i8> @mul_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; SVE2-NEXT: mul z0.h, z0.h, z1.h ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = mul <4 x i8> %op1, %op2 ret <4 x i8> %res } @@ -234,6 +830,47 @@ define <8 x i8> @mul_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; SVE2-NEXT: mul z0.b, z0.b, z1.b ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = mul <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -255,6 +892,78 @@ define <16 x i8> @mul_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; SVE2-NEXT: mul z0.b, z0.b, z1.b ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = mul <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -279,6 +988,147 @@ define void @mul_v32i8(ptr %a, ptr %b) { ; SVE2-NEXT: mul z1.b, z2.b, z3.b ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #47] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #95] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #62] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #61] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #93] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #60] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #59] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #91] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #58] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #57] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #89] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #56] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #55] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #54] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #53] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #85] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #52] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #51] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #50] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #49] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #81] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #48] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #79] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #77] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #75] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #73] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #71] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #69] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #67] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #65] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = mul <32 x i8> %op1, %op2 @@ -303,6 +1153,21 @@ define <2 x i16> @mul_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; SVE2-NEXT: mul z0.s, z0.s, z1.s ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: mul w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = mul <2 x i16> %op1, %op2 ret <2 x i16> %res } @@ -324,6 +1189,31 @@ define <4 x i16> @mul_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; SVE2-NEXT: mul z0.h, z0.h, z1.h ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = mul <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -345,6 +1235,46 @@ define <8 x i16> @mul_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; SVE2-NEXT: mul z0.h, z0.h, z1.h ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = mul <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -369,6 +1299,83 @@ define void @mul_v16i16(ptr %a, ptr %b) { ; SVE2-NEXT: mul z1.h, z2.h, z3.h ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #46] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #44] +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #42] +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #58] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #40] +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #38] +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #54] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #36] +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #34] +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #50] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = mul <16 x i16> %op1, %op2 @@ -393,6 +1400,21 @@ define <2 x i32> @mul_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; SVE2-NEXT: mul z0.s, z0.s, z1.s ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: mul w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = mul <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -414,6 +1436,26 @@ define <4 x i32> @mul_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; SVE2-NEXT: mul z0.s, z0.s, z1.s ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: mul w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: mul w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = mul <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -438,6 +1480,43 @@ define void @mul_v8i32(ptr %a, ptr %b) { ; SVE2-NEXT: mul z1.s, z2.s, z3.s ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] +; NONEON-NOSVE-NEXT: mul w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #88] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] +; NONEON-NOSVE-NEXT: mul w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: mul w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #72] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: mul w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: mul w8, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = mul <8 x i32> %op1, %op2 @@ -462,6 +1541,18 @@ define <1 x i64> @mul_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; SVE2-NEXT: mul z0.d, z0.d, z1.d ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: mul x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = mul <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -483,6 +1574,20 @@ define <2 x i64> @mul_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; SVE2-NEXT: mul z0.d, z0.d, z1.d ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: mul x11, x10, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: mul x8, x9, x8 +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = mul <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -507,6 +1612,31 @@ define void @mul_v4i64(ptr %a, ptr %b) { ; SVE2-NEXT: mul z1.d, z2.d, z3.d ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #56] +; NONEON-NOSVE-NEXT: mul x11, x10, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #48] +; NONEON-NOSVE-NEXT: mul x8, x9, x8 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #80] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: mul x11, x10, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: mul x8, x9, x8 +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = mul <4 x i64> %op1, %op2 @@ -526,6 +1656,31 @@ define <4 x i8> @sub_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-NEXT: sub z0.h, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = sub <4 x i8> %op1, %op2 ret <4 x i8> %res } @@ -538,6 +1693,47 @@ define <8 x i8> @sub_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: sub z0.b, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = sub <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -550,6 +1746,78 @@ define <16 x i8> @sub_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: sub z0.b, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = sub <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -563,6 +1831,147 @@ define void @sub_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: sub z1.b, z2.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #47] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #95] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #62] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #61] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #93] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #60] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #59] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #91] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #58] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #57] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #89] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #56] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #55] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #54] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #53] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #85] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #52] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #51] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #50] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #49] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #81] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #48] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #79] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #77] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #75] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #73] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #71] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #69] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #67] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #65] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = sub <32 x i8> %op1, %op2 @@ -578,6 +1987,22 @@ define <2 x i16> @sub_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; CHECK-NEXT: sub z0.s, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: sub w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = sub <2 x i16> %op1, %op2 ret <2 x i16> %res } @@ -590,6 +2015,31 @@ define <4 x i16> @sub_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: sub z0.h, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = sub <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -602,6 +2052,46 @@ define <8 x i16> @sub_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: sub z0.h, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = sub <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -615,6 +2105,83 @@ define void @sub_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: sub z1.h, z2.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #46] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #44] +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #42] +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #58] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #40] +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #38] +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #54] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #36] +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #34] +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #50] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = sub <16 x i16> %op1, %op2 @@ -630,6 +2197,22 @@ define <2 x i32> @sub_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: sub z0.s, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: sub w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = sub <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -642,6 +2225,28 @@ define <4 x i32> @sub_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: sub z0.s, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: sub w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: str w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: sub w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = sub <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -655,6 +2260,47 @@ define void @sub_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: sub z1.s, z2.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] +; NONEON-NOSVE-NEXT: sub w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #32] +; NONEON-NOSVE-NEXT: str w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] +; NONEON-NOSVE-NEXT: sub w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: str w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: sub w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: str w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: sub w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = sub <8 x i32> %op1, %op2 @@ -670,6 +2316,18 @@ define <1 x i64> @sub_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: sub z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: sub x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = sub <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -682,6 +2340,21 @@ define <2 x i64> @sub_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: sub z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: sub x8, x10, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: sub x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = sub <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -695,6 +2368,33 @@ define void @sub_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: sub z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #56] +; NONEON-NOSVE-NEXT: sub x8, x10, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #88] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #48] +; NONEON-NOSVE-NEXT: sub x8, x9, x8 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: str x8, [sp, #80] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: sub x8, x10, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #72] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: sub x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = sub <4 x i64> %op1, %op2 @@ -715,6 +2415,30 @@ define <4 x i8> @abs_v4i8(<4 x i8> %op1) { ; CHECK-NEXT: abs z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #6] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #4] +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #2] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: cmp w9, #0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #14] +; NONEON-NOSVE-NEXT: cneg w8, w9, mi +; NONEON-NOSVE-NEXT: cmp w10, #0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #12] +; NONEON-NOSVE-NEXT: cneg w8, w10, mi +; NONEON-NOSVE-NEXT: cmp w11, #0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #10] +; NONEON-NOSVE-NEXT: cneg w8, w11, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %op1, i1 false) ret <4 x i8> %res } @@ -727,6 +2451,46 @@ define <8 x i8> @abs_v8i8(<8 x i8> %op1) { ; CHECK-NEXT: abs z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #7] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #15] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #5] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #13] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #3] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #11] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #2] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #1] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #9] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %op1, i1 false) ret <8 x i8> %res } @@ -739,6 +2503,78 @@ define <16 x i8> @abs_v16i8(<16 x i8> %op1) { ; CHECK-NEXT: abs z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #13] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #11] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #9] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #7] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #5] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #21] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #3] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #19] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #2] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #1] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %op1, i1 false) ret <16 x i8> %res } @@ -752,6 +2588,144 @@ define void @abs_v32i8(ptr %a) { ; CHECK-NEXT: abs z1.b, p0/m, z1.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #31] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #29] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #61] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #27] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #59] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #25] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #57] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #55] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #53] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #51] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #49] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #13] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #11] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #9] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #7] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #5] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #3] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #2] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #1] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %res = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %op1, i1 false) store <32 x i8> %res, ptr %a @@ -767,6 +2741,21 @@ define <2 x i16> @abs_v2i16(<2 x i16> %op1) { ; CHECK-NEXT: abs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #4] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: cmp w9, #0 +; NONEON-NOSVE-NEXT: cneg w9, w9, mi +; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %op1, i1 false) ret <2 x i16> %res } @@ -779,6 +2768,30 @@ define <4 x i16> @abs_v4i16(<4 x i16> %op1) { ; CHECK-NEXT: abs z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #2] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %op1, i1 false) ret <4 x i16> %res } @@ -791,6 +2804,46 @@ define <8 x i16> @abs_v8i16(<8 x i16> %op1) { ; CHECK-NEXT: abs z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #2] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %op1, i1 false) ret <8 x i16> %res } @@ -804,6 +2857,80 @@ define void @abs_v16i16(ptr %a) { ; CHECK-NEXT: abs z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #2] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %res = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %op1, i1 false) store <16 x i16> %res, ptr %a @@ -818,6 +2945,21 @@ define <2 x i32> @abs_v2i32(<2 x i32> %op1) { ; CHECK-NEXT: abs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w9, w8, mi +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %op1, i1 false) ret <2 x i32> %res } @@ -830,6 +2972,28 @@ define <4 x i32> @abs_v4i32(<4 x i32> %op1) { ; CHECK-NEXT: abs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w9, w8, mi +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w9, w8, mi +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %op1, i1 false) ret <4 x i32> %res } @@ -843,6 +3007,44 @@ define void @abs_v8i32(ptr %a) { ; CHECK-NEXT: abs z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w9, w8, mi +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w9, w8, mi +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w9, w8, mi +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w9, w8, mi +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %res = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %op1, i1 false) store <8 x i32> %res, ptr %a @@ -857,6 +3059,18 @@ define <1 x i64> @abs_v1i64(<1 x i64> %op1) { ; CHECK-NEXT: abs z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: cmp x8, #0 +; NONEON-NOSVE-NEXT: cneg x8, x8, mi +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <1 x i64> @llvm.abs.v1i64(<1 x i64> %op1, i1 false) ret <1 x i64> %res } @@ -869,6 +3083,21 @@ define <2 x i64> @abs_v2i64(<2 x i64> %op1) { ; CHECK-NEXT: abs z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: cmp x8, #0 +; NONEON-NOSVE-NEXT: cneg x9, x8, mi +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: cmp x8, #0 +; NONEON-NOSVE-NEXT: cneg x8, x8, mi +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %op1, i1 false) ret <2 x i64> %res } @@ -882,6 +3111,30 @@ define void @abs_v4i64(ptr %a) { ; CHECK-NEXT: abs z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp x8, #0 +; NONEON-NOSVE-NEXT: cneg x9, x8, mi +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp x8, #0 +; NONEON-NOSVE-NEXT: cneg x8, x8, mi +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: cmp x8, #0 +; NONEON-NOSVE-NEXT: cneg x9, x8, mi +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: cmp x8, #0 +; NONEON-NOSVE-NEXT: cneg x8, x8, mi +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %res = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %op1, i1 false) store <4 x i64> %res, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll index c2f3bbfb51dd5..0b4316686fff6 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -18,6 +19,55 @@ define <8 x i8> @icmp_eq_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %cmp = icmp eq <8 x i8> %op1, %op2 %sext = sext <8 x i1> %cmp to <8 x i8> ret <8 x i8> %sext @@ -33,6 +83,94 @@ define <16 x i8> @icmp_eq_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %cmp = icmp eq <16 x i8> %op1, %op2 %sext = sext <16 x i1> %cmp to <16 x i8> ret <16 x i8> %sext @@ -50,6 +188,179 @@ define void @icmp_eq_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #47] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #95] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #62] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #45] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #61] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #93] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #60] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #43] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #59] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #42] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #91] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #58] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #41] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #57] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #89] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #56] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #39] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #55] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #38] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #54] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #37] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #53] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #36] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #85] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #52] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #35] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #51] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #34] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #50] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #33] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #49] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #32] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #81] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #48] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #79] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #77] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #75] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #73] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #71] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #69] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #67] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #65] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %cmp = icmp eq <32 x i8> %op1, %op2 @@ -68,6 +379,35 @@ define <4 x i16> @icmp_eq_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %cmp = icmp eq <4 x i16> %op1, %op2 %sext = sext <4 x i1> %cmp to <4 x i16> ret <4 x i16> %sext @@ -83,6 +423,54 @@ define <8 x i16> @icmp_eq_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %cmp = icmp eq <8 x i16> %op1, %op2 %sext = sext <8 x i1> %cmp to <8 x i16> ret <8 x i16> %sext @@ -100,6 +488,99 @@ define void @icmp_eq_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #46] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #44] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #42] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #58] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #40] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #38] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #54] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #36] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #34] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #50] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #32] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %cmp = icmp eq <16 x i16> %op1, %op2 @@ -118,6 +599,23 @@ define <2 x i32> @icmp_eq_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: csetm w10, eq +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %cmp = icmp eq <2 x i32> %op1, %op2 %sext = sext <2 x i1> %cmp to <2 x i32> ret <2 x i32> %sext @@ -133,6 +631,30 @@ define <4 x i32> @icmp_eq_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: csetm w10, eq +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: csetm w10, eq +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %cmp = icmp eq <4 x i32> %op1, %op2 %sext = sext <4 x i1> %cmp to <4 x i32> ret <4 x i32> %sext @@ -150,6 +672,51 @@ define void @icmp_eq_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] +; NONEON-NOSVE-NEXT: csetm w10, eq +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #88] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #32] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: csetm w10, eq +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: csetm w10, eq +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #72] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: csetm w10, eq +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %cmp = icmp eq <8 x i32> %op1, %op2 @@ -168,6 +735,19 @@ define <1 x i64> @icmp_eq_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csetm x8, eq +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %cmp = icmp eq <1 x i64> %op1, %op2 %sext = sext <1 x i1> %cmp to <1 x i64> ret <1 x i64> %sext @@ -183,6 +763,22 @@ define <2 x i64> @icmp_eq_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp x10, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: csetm x10, eq +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csetm x8, eq +; NONEON-NOSVE-NEXT: stp x8, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %cmp = icmp eq <2 x i64> %op1, %op2 %sext = sext <2 x i1> %cmp to <2 x i64> ret <2 x i64> %sext @@ -200,6 +796,35 @@ define void @icmp_eq_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #56] +; NONEON-NOSVE-NEXT: cmp x10, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #48] +; NONEON-NOSVE-NEXT: csetm x10, eq +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csetm x8, eq +; NONEON-NOSVE-NEXT: stp x8, x10, [sp, #80] +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp x10, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: csetm x10, eq +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csetm x8, eq +; NONEON-NOSVE-NEXT: stp x8, x10, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %cmp = icmp eq <4 x i64> %op1, %op2 @@ -224,6 +849,179 @@ define void @icmp_ne_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_ne_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #47] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #95] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #62] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #45] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #61] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #93] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #60] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #43] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #59] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #42] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #91] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #58] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #41] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #57] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #89] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #56] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #39] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #55] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #38] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #54] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #37] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #53] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #36] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #85] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #52] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #35] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #51] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #34] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #50] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #33] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #49] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #32] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #81] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #48] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #79] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #77] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #75] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #73] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #71] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #69] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #67] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #65] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %cmp = icmp ne <32 x i8> %op1, %op2 @@ -246,6 +1044,57 @@ define void @icmp_sge_v8i16(ptr %a, ptr %b) { ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_sge_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #12] +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #6] +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #4] +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #2] +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp] +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i16>, ptr %a %op2 = load <8 x i16>, ptr %b %cmp = icmp sge <8 x i16> %op1, %op2 @@ -270,6 +1119,99 @@ define void @icmp_sgt_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_sgt_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #46] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #44] +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #60] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #42] +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #58] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #40] +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #56] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #38] +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #54] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #36] +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #52] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #34] +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #50] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #32] +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #48] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #12] +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #6] +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #4] +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #2] +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp] +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %cmp = icmp sgt <16 x i16> %op1, %op2 @@ -292,6 +1234,33 @@ define void @icmp_sle_v4i32(ptr %a, ptr %b) { ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_sle_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: csetm w10, le +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: csetm w10, le +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i32>, ptr %a %op2 = load <4 x i32>, ptr %b %cmp = icmp sle <4 x i32> %op1, %op2 @@ -316,6 +1285,51 @@ define void @icmp_slt_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_slt_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] +; NONEON-NOSVE-NEXT: csetm w10, lt +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #88] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #32] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: csetm w10, lt +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: csetm w10, lt +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #72] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: csetm w10, lt +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %cmp = icmp slt <8 x i32> %op1, %op2 @@ -338,6 +1352,25 @@ define void @icmp_uge_v2i64(ptr %a, ptr %b) { ; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_uge_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp x10, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: csetm x10, hs +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csetm x8, hs +; NONEON-NOSVE-NEXT: stp x8, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x i64>, ptr %a %op2 = load <2 x i64>, ptr %b %cmp = icmp uge <2 x i64> %op1, %op2 @@ -360,6 +1393,25 @@ define void @icmp_ugt_v2i64(ptr %a, ptr %b) { ; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_ugt_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp x10, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: csetm x10, hi +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csetm x8, hi +; NONEON-NOSVE-NEXT: stp x8, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x i64>, ptr %a %op2 = load <2 x i64>, ptr %b %cmp = icmp ugt <2 x i64> %op1, %op2 @@ -382,6 +1434,25 @@ define void @icmp_ule_v2i64(ptr %a, ptr %b) { ; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_ule_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp x10, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: csetm x10, ls +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csetm x8, ls +; NONEON-NOSVE-NEXT: stp x8, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x i64>, ptr %a %op2 = load <2 x i64>, ptr %b %cmp = icmp ule <2 x i64> %op1, %op2 @@ -404,6 +1475,25 @@ define void @icmp_ult_v2i64(ptr %a, ptr %b) { ; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_ult_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp x10, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: csetm x10, lo +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csetm x8, lo +; NONEON-NOSVE-NEXT: stp x8, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x i64>, ptr %a %op2 = load <2 x i64>, ptr %b %cmp = icmp ult <2 x i64> %op1, %op2 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll index e6fd775b4cfb9..e09b1613a54af 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll @@ -2,6 +2,7 @@ ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE ; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2 ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2 +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -24,6 +25,31 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14] +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #12] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #10] +; NONEON-NOSVE-NEXT: ldrsb w12, [sp, #8] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #20] +; NONEON-NOSVE-NEXT: sdiv w9, w10, w9 +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #18] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: sdiv w10, w11, w10 +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #16] +; NONEON-NOSVE-NEXT: strh w9, [sp, #28] +; NONEON-NOSVE-NEXT: sdiv w11, w12, w11 +; NONEON-NOSVE-NEXT: strh w10, [sp, #26] +; NONEON-NOSVE-NEXT: strh w11, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = sdiv <4 x i8> %op1, %op2 ret <4 x i8> %res } @@ -51,6 +77,47 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = sdiv <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -98,6 +165,78 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #29] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #27] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #25] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = sdiv <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -178,6 +317,147 @@ define void @sdiv_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: splice z3.b, p0, z3.b, z1.b ; CHECK-NEXT: stp q3, q2, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #47] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #95] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #62] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #61] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #93] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #60] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #59] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #91] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #58] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #57] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #89] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #56] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #55] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #54] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #53] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #85] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #52] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #51] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #50] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #49] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #81] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #48] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #31] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #79] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #29] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #77] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #27] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #75] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #25] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #73] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #71] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #69] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #67] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #65] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = sdiv <32 x i8> %op1, %op2 @@ -196,6 +476,22 @@ define <2 x i16> @sdiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #12] +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #8] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #16] +; NONEON-NOSVE-NEXT: sdiv w9, w10, w9 +; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = sdiv <2 x i16> %op1, %op2 ret <2 x i16> %res } @@ -212,6 +508,31 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = sdiv <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -238,6 +559,46 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #26] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #22] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = sdiv <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -278,6 +639,83 @@ define void @sdiv_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: splice z3.h, p0, z3.h, z1.h ; CHECK-NEXT: stp q3, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #46] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #44] +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #60] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #42] +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #58] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #40] +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #56] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #38] +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #54] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #36] +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #52] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #34] +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #50] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #48] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #30] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #26] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #22] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = sdiv <16 x i16> %op1, %op2 @@ -294,6 +732,21 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: sdiv w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = sdiv <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -307,6 +760,26 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: sdiv w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: sdiv w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = sdiv <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -322,6 +795,43 @@ define void @sdiv_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: sdiv z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] +; NONEON-NOSVE-NEXT: sdiv w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #88] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] +; NONEON-NOSVE-NEXT: sdiv w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: sdiv w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #72] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: sdiv w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = sdiv <8 x i32> %op1, %op2 @@ -338,6 +848,18 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: sdiv x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = sdiv <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -351,6 +873,20 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: sdiv x11, x10, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: sdiv x8, x9, x8 +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = sdiv <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -366,6 +902,31 @@ define void @sdiv_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: sdiv z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #56] +; NONEON-NOSVE-NEXT: sdiv x11, x10, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #48] +; NONEON-NOSVE-NEXT: sdiv x8, x9, x8 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #80] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: sdiv x11, x10, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: sdiv x8, x9, x8 +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = sdiv <4 x i64> %op1, %op2 @@ -391,6 +952,31 @@ define <4 x i8> @udiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #8] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #20] +; NONEON-NOSVE-NEXT: udiv w9, w10, w9 +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #18] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: udiv w10, w11, w10 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #16] +; NONEON-NOSVE-NEXT: strh w9, [sp, #28] +; NONEON-NOSVE-NEXT: udiv w11, w12, w11 +; NONEON-NOSVE-NEXT: strh w10, [sp, #26] +; NONEON-NOSVE-NEXT: strh w11, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = udiv <4 x i8> %op1, %op2 ret <4 x i8> %res } @@ -418,6 +1004,47 @@ define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = udiv <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -465,6 +1092,78 @@ define <16 x i8> @udiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = udiv <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -545,6 +1244,147 @@ define void @udiv_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: splice z3.b, p0, z3.b, z1.b ; CHECK-NEXT: stp q3, q2, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #47] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #95] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #62] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #61] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #93] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #60] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #59] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #91] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #58] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #57] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #89] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #56] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #55] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #54] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #53] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #85] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #52] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #51] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #50] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #49] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #81] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #48] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #79] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #77] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #75] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #73] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #71] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #69] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #67] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #65] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = udiv <32 x i8> %op1, %op2 @@ -563,6 +1403,22 @@ define <2 x i16> @udiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #8] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #16] +; NONEON-NOSVE-NEXT: udiv w9, w10, w9 +; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = udiv <2 x i16> %op1, %op2 ret <2 x i16> %res } @@ -579,6 +1435,31 @@ define <4 x i16> @udiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = udiv <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -605,6 +1486,46 @@ define <8 x i16> @udiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = udiv <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -645,6 +1566,83 @@ define void @udiv_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: splice z3.h, p0, z3.h, z1.h ; CHECK-NEXT: stp q3, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #46] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #44] +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #42] +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #58] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #40] +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #38] +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #54] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #36] +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #34] +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #50] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = udiv <16 x i16> %op1, %op2 @@ -661,6 +1659,21 @@ define <2 x i32> @udiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: udiv w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = udiv <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -674,6 +1687,26 @@ define <4 x i32> @udiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: udiv w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: udiv w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = udiv <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -689,6 +1722,43 @@ define void @udiv_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: udiv z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] +; NONEON-NOSVE-NEXT: udiv w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #88] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] +; NONEON-NOSVE-NEXT: udiv w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: udiv w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #72] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: udiv w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = udiv <8 x i32> %op1, %op2 @@ -705,6 +1775,18 @@ define <1 x i64> @udiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: udiv x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = udiv <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -718,6 +1800,20 @@ define <2 x i64> @udiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: udiv x11, x10, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: udiv x8, x9, x8 +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = udiv <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -733,6 +1829,31 @@ define void @udiv_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: udiv z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #56] +; NONEON-NOSVE-NEXT: udiv x11, x10, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #48] +; NONEON-NOSVE-NEXT: udiv x8, x9, x8 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #80] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: udiv x11, x10, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: udiv x8, x9, x8 +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = udiv <4 x i64> %op1, %op2 @@ -778,6 +1899,70 @@ define void @udiv_constantsplat_v8i32(ptr %a) { ; SVE2-NEXT: lsr z0.s, z0.s, #6 ; SVE2-NEXT: stp q1, q0, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_constantsplat_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: mov w8, #8969 // =0x2309 +; NONEON-NOSVE-NEXT: movk w8, #22765, lsl #16 +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #28] +; NONEON-NOSVE-NEXT: umull x10, w9, w8 +; NONEON-NOSVE-NEXT: lsr x10, x10, #32 +; NONEON-NOSVE-NEXT: sub w9, w9, w10 +; NONEON-NOSVE-NEXT: add w9, w10, w9, lsr #1 +; NONEON-NOSVE-NEXT: lsr w11, w9, #6 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #24] +; NONEON-NOSVE-NEXT: umull x10, w9, w8 +; NONEON-NOSVE-NEXT: lsr x10, x10, #32 +; NONEON-NOSVE-NEXT: sub w9, w9, w10 +; NONEON-NOSVE-NEXT: add w9, w10, w9, lsr #1 +; NONEON-NOSVE-NEXT: lsr w9, w9, #6 +; NONEON-NOSVE-NEXT: stp w9, w11, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #20] +; NONEON-NOSVE-NEXT: umull x10, w9, w8 +; NONEON-NOSVE-NEXT: lsr x10, x10, #32 +; NONEON-NOSVE-NEXT: sub w9, w9, w10 +; NONEON-NOSVE-NEXT: add w9, w10, w9, lsr #1 +; NONEON-NOSVE-NEXT: lsr w11, w9, #6 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #16] +; NONEON-NOSVE-NEXT: umull x10, w9, w8 +; NONEON-NOSVE-NEXT: lsr x10, x10, #32 +; NONEON-NOSVE-NEXT: sub w9, w9, w10 +; NONEON-NOSVE-NEXT: add w9, w10, w9, lsr #1 +; NONEON-NOSVE-NEXT: lsr w9, w9, #6 +; NONEON-NOSVE-NEXT: stp w9, w11, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] +; NONEON-NOSVE-NEXT: umull x10, w9, w8 +; NONEON-NOSVE-NEXT: lsr x10, x10, #32 +; NONEON-NOSVE-NEXT: sub w9, w9, w10 +; NONEON-NOSVE-NEXT: add w9, w10, w9, lsr #1 +; NONEON-NOSVE-NEXT: lsr w11, w9, #6 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] +; NONEON-NOSVE-NEXT: umull x10, w9, w8 +; NONEON-NOSVE-NEXT: lsr x10, x10, #32 +; NONEON-NOSVE-NEXT: sub w9, w9, w10 +; NONEON-NOSVE-NEXT: add w9, w10, w9, lsr #1 +; NONEON-NOSVE-NEXT: lsr w9, w9, #6 +; NONEON-NOSVE-NEXT: stp w9, w11, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #4] +; NONEON-NOSVE-NEXT: umull x10, w9, w8 +; NONEON-NOSVE-NEXT: lsr x10, x10, #32 +; NONEON-NOSVE-NEXT: sub w9, w9, w10 +; NONEON-NOSVE-NEXT: add w9, w10, w9, lsr #1 +; NONEON-NOSVE-NEXT: lsr w11, w9, #6 +; NONEON-NOSVE-NEXT: ldr w9, [sp] +; NONEON-NOSVE-NEXT: umull x8, w9, w8 +; NONEON-NOSVE-NEXT: lsr x8, x8, #32 +; NONEON-NOSVE-NEXT: sub w9, w9, w8 +; NONEON-NOSVE-NEXT: add w8, w8, w9, lsr #1 +; NONEON-NOSVE-NEXT: lsr w8, w8, #6 +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %res = udiv <8 x i32> %op1, store <8 x i32> %res, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll index e40668a8696ee..2c2b79121ef82 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll @@ -2,6 +2,7 @@ ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE ; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2 ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2 +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -26,6 +27,54 @@ define void @sext_v8i1_v8i32(<8 x i1> %a, ptr %out) { ; CHECK-NEXT: asr z0.s, z0.s, #31 ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v8i1_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #80 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: strh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: strh w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: strh w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #34] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #38] +; NONEON-NOSVE-NEXT: sbfx w12, w12, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: stp w10, w8, [sp, #72] +; NONEON-NOSVE-NEXT: sbfx w8, w14, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w10, w15, #0, #1 +; NONEON-NOSVE-NEXT: stp w8, w12, [sp, #64] +; NONEON-NOSVE-NEXT: sbfx w12, w13, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w8, w11, #0, #1 +; NONEON-NOSVE-NEXT: stp w12, w10, [sp, #56] +; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %b = sext <8 x i1> %a to <8 x i32> store <8 x i32> %b, ptr %out ret void @@ -52,6 +101,26 @@ define void @sext_v4i3_v4i64(<4 x i3> %a, ptr %out) { ; CHECK-NEXT: asr z0.d, z0.d, #61 ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v4i3_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldp w11, w10, [sp, #16] +; NONEON-NOSVE-NEXT: sbfx x8, x8, #0, #3 +; NONEON-NOSVE-NEXT: sbfx x9, x9, #0, #3 +; NONEON-NOSVE-NEXT: sbfx x10, x10, #0, #3 +; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #48] +; NONEON-NOSVE-NEXT: sbfx x8, x11, #0, #3 +; NONEON-NOSVE-NEXT: stp x8, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %b = sext <4 x i3> %a to <4 x i64> store <4 x i64> %b, ptr %out ret void @@ -70,6 +139,49 @@ define void @sext_v16i8_v16i16(<16 x i8> %a, ptr %out) { ; CHECK-NEXT: sunpklo z0.h, z0.b ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v16i8_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #31] +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30] +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #29] +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28] +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #27] +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26] +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #25] +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24] +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %b = sext <16 x i8> %a to <16 x i16> store <16 x i16>%b, ptr %out ret void @@ -91,6 +203,210 @@ define void @sext_v32i8_v32i16(ptr %in, ptr %out) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v32i8_v32i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #272 +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #176] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #192] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #208] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #224] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #240] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #256] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 272 +; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 +; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 +; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 +; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 +; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 +; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 +; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 +; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 +; NONEON-NOSVE-NEXT: .cfi_offset w27, -72 +; NONEON-NOSVE-NEXT: .cfi_offset w28, -80 +; NONEON-NOSVE-NEXT: .cfi_offset w30, -88 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -96 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w29, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w27, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w25, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w23, [sp, #28] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w21, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w19, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w6, [sp, #38] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: ldrb w28, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #22] +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: add w8, w29, w29 +; NONEON-NOSVE-NEXT: ldrb w4, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: add w8, w27, w27 +; NONEON-NOSVE-NEXT: ldrb w26, [sp, #31] +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: add w8, w25, w25 +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: add w8, w23, w23 +; NONEON-NOSVE-NEXT: ldrb w2, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: add w8, w21, w21 +; NONEON-NOSVE-NEXT: ldrb w24, [sp, #29] +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: add w8, w19, w19 +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #23] +; NONEON-NOSVE-NEXT: strb w9, [sp, #59] +; NONEON-NOSVE-NEXT: add w9, w28, w28 +; NONEON-NOSVE-NEXT: add w18, w16, w16 +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: add w8, w6, w6 +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w22, [sp, #27] +; NONEON-NOSVE-NEXT: strb w9, [sp, #57] +; NONEON-NOSVE-NEXT: add w9, w26, w26 +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: add w8, w4, w4 +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w20, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w30, [sp, #21] +; NONEON-NOSVE-NEXT: strb w9, [sp, #55] +; NONEON-NOSVE-NEXT: add w9, w24, w24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: add w8, w2, w2 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44] +; NONEON-NOSVE-NEXT: add w17, w17, w17 +; NONEON-NOSVE-NEXT: strb w9, [sp, #53] +; NONEON-NOSVE-NEXT: add w9, w22, w22 +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: add w8, w16, w16 +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42] +; NONEON-NOSVE-NEXT: strb w17, [sp, #63] +; NONEON-NOSVE-NEXT: add w17, w30, w30 +; NONEON-NOSVE-NEXT: strb w9, [sp, #51] +; NONEON-NOSVE-NEXT: add w9, w20, w20 +; NONEON-NOSVE-NEXT: ldrb w7, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: add w8, w14, w14 +; NONEON-NOSVE-NEXT: ldrb w5, [sp, #37] +; NONEON-NOSVE-NEXT: strb w18, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w3, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w0, [sp, #33] +; NONEON-NOSVE-NEXT: strb w17, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #45] +; NONEON-NOSVE-NEXT: strb w9, [sp, #49] +; NONEON-NOSVE-NEXT: add w9, w7, w7 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: add w8, w12, w12 +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: add w8, w10, w10 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #79] +; NONEON-NOSVE-NEXT: add w9, w5, w5 +; NONEON-NOSVE-NEXT: strb w9, [sp, #77] +; NONEON-NOSVE-NEXT: add w9, w3, w3 +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #95] +; NONEON-NOSVE-NEXT: strb w9, [sp, #75] +; NONEON-NOSVE-NEXT: add w9, w0, w0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #142] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #94] +; NONEON-NOSVE-NEXT: strb w9, [sp, #73] +; NONEON-NOSVE-NEXT: add w9, w15, w15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #140] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #93] +; NONEON-NOSVE-NEXT: strb w9, [sp, #71] +; NONEON-NOSVE-NEXT: add w9, w13, w13 +; NONEON-NOSVE-NEXT: strh w8, [sp, #138] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #92] +; NONEON-NOSVE-NEXT: strb w9, [sp, #69] +; NONEON-NOSVE-NEXT: add w9, w11, w11 +; NONEON-NOSVE-NEXT: strh w8, [sp, #136] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #91] +; NONEON-NOSVE-NEXT: strb w9, [sp, #67] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #134] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #90] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #256] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #132] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #89] +; NONEON-NOSVE-NEXT: strb w9, [sp, #65] +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #240] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #130] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: strh w8, [sp, #128] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #224] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #126] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #86] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #208] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #124] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #85] +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #192] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #122] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #176] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #120] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #83] +; NONEON-NOSVE-NEXT: strh w8, [sp, #118] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #82] +; NONEON-NOSVE-NEXT: strh w8, [sp, #116] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #81] +; NONEON-NOSVE-NEXT: strh w8, [sp, #114] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #80] +; NONEON-NOSVE-NEXT: strh w8, [sp, #112] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #111] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #112] +; NONEON-NOSVE-NEXT: strh w8, [sp, #174] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #110] +; NONEON-NOSVE-NEXT: strh w8, [sp, #172] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #109] +; NONEON-NOSVE-NEXT: strh w8, [sp, #170] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #108] +; NONEON-NOSVE-NEXT: strh w8, [sp, #168] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #107] +; NONEON-NOSVE-NEXT: strh w8, [sp, #166] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #106] +; NONEON-NOSVE-NEXT: strh w8, [sp, #164] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #105] +; NONEON-NOSVE-NEXT: strh w8, [sp, #162] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #104] +; NONEON-NOSVE-NEXT: strh w8, [sp, #160] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #103] +; NONEON-NOSVE-NEXT: strh w8, [sp, #158] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #102] +; NONEON-NOSVE-NEXT: strh w8, [sp, #156] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #101] +; NONEON-NOSVE-NEXT: strh w8, [sp, #154] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #100] +; NONEON-NOSVE-NEXT: strh w8, [sp, #152] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #99] +; NONEON-NOSVE-NEXT: strh w8, [sp, #150] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #98] +; NONEON-NOSVE-NEXT: strh w8, [sp, #148] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #97] +; NONEON-NOSVE-NEXT: strh w8, [sp, #146] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #96] +; NONEON-NOSVE-NEXT: strh w8, [sp, #144] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #144] +; NONEON-NOSVE-NEXT: stp q2, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #272 +; NONEON-NOSVE-NEXT: ret %a = load <32 x i8>, ptr %in %b = add <32 x i8> %a, %a %c = sext <32 x i8> %b to <32 x i16> @@ -112,6 +428,46 @@ define void @sext_v8i8_v8i32(<8 x i8> %a, ptr %out) { ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v8i8_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #80 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #11] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #9] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #15] +; NONEON-NOSVE-NEXT: strh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #14] +; NONEON-NOSVE-NEXT: strh w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #13] +; NONEON-NOSVE-NEXT: strh w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #44] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #40] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #36] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %b = sext <8 x i8> %a to <8 x i32> store <8 x i32>%b, ptr %out ret void @@ -133,6 +489,79 @@ define void @sext_v16i8_v16i32(<16 x i8> %a, ptr %out) { ; CHECK-NEXT: stp q2, q1, [x0] ; CHECK-NEXT: stp q3, q0, [x0, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v16i8_v16i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-160]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #27] +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26] +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #25] +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24] +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #31] +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30] +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #29] +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28] +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #94] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20] +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #90] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #88] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #86] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #84] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #82] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #80] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #78] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #128] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #74] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #72] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #70] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #68] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #66] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #64] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q2, q3, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: ret %b = sext <16 x i8> %a to <16 x i32> store <16 x i32> %b, ptr %out ret void @@ -167,6 +596,284 @@ define void @sext_v32i8_v32i32(ptr %in, ptr %out) { ; CHECK-NEXT: stp q6, q0, [x1, #96] ; CHECK-NEXT: stp q7, q1, [x1, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v32i8_v32i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #464 +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #368] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #384] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #400] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #416] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #432] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #448] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 464 +; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 +; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 +; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 +; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 +; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 +; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 +; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 +; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 +; NONEON-NOSVE-NEXT: .cfi_offset w27, -72 +; NONEON-NOSVE-NEXT: .cfi_offset w28, -80 +; NONEON-NOSVE-NEXT: .cfi_offset w30, -88 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -96 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w29, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w27, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w25, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w23, [sp, #28] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w21, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w19, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w6, [sp, #38] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: ldrb w28, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #22] +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: add w8, w29, w29 +; NONEON-NOSVE-NEXT: ldrb w4, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: add w8, w27, w27 +; NONEON-NOSVE-NEXT: ldrb w26, [sp, #31] +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: add w8, w25, w25 +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: add w8, w23, w23 +; NONEON-NOSVE-NEXT: ldrb w2, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: add w8, w21, w21 +; NONEON-NOSVE-NEXT: ldrb w24, [sp, #29] +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: add w8, w19, w19 +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #23] +; NONEON-NOSVE-NEXT: strb w9, [sp, #59] +; NONEON-NOSVE-NEXT: add w9, w28, w28 +; NONEON-NOSVE-NEXT: add w18, w16, w16 +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: add w8, w6, w6 +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w22, [sp, #27] +; NONEON-NOSVE-NEXT: strb w9, [sp, #57] +; NONEON-NOSVE-NEXT: add w9, w26, w26 +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: add w8, w4, w4 +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w20, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w30, [sp, #21] +; NONEON-NOSVE-NEXT: strb w9, [sp, #55] +; NONEON-NOSVE-NEXT: add w9, w24, w24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: add w8, w2, w2 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44] +; NONEON-NOSVE-NEXT: add w17, w17, w17 +; NONEON-NOSVE-NEXT: strb w9, [sp, #53] +; NONEON-NOSVE-NEXT: add w9, w22, w22 +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: add w8, w16, w16 +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42] +; NONEON-NOSVE-NEXT: strb w17, [sp, #63] +; NONEON-NOSVE-NEXT: add w17, w30, w30 +; NONEON-NOSVE-NEXT: strb w9, [sp, #51] +; NONEON-NOSVE-NEXT: add w9, w20, w20 +; NONEON-NOSVE-NEXT: ldrb w7, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: add w8, w14, w14 +; NONEON-NOSVE-NEXT: ldrb w5, [sp, #37] +; NONEON-NOSVE-NEXT: strb w18, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w3, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w0, [sp, #33] +; NONEON-NOSVE-NEXT: strb w17, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #45] +; NONEON-NOSVE-NEXT: strb w9, [sp, #49] +; NONEON-NOSVE-NEXT: add w9, w7, w7 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: add w8, w12, w12 +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: add w8, w10, w10 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #79] +; NONEON-NOSVE-NEXT: add w9, w5, w5 +; NONEON-NOSVE-NEXT: strb w9, [sp, #77] +; NONEON-NOSVE-NEXT: add w9, w3, w3 +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #91] +; NONEON-NOSVE-NEXT: strb w9, [sp, #75] +; NONEON-NOSVE-NEXT: add w9, w0, w0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #134] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #90] +; NONEON-NOSVE-NEXT: strb w9, [sp, #73] +; NONEON-NOSVE-NEXT: add w9, w15, w15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #132] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #89] +; NONEON-NOSVE-NEXT: strb w9, [sp, #71] +; NONEON-NOSVE-NEXT: add w9, w13, w13 +; NONEON-NOSVE-NEXT: strh w8, [sp, #130] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #88] +; NONEON-NOSVE-NEXT: strb w9, [sp, #69] +; NONEON-NOSVE-NEXT: add w9, w11, w11 +; NONEON-NOSVE-NEXT: strh w8, [sp, #128] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #95] +; NONEON-NOSVE-NEXT: strb w9, [sp, #67] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #142] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #94] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #448] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #140] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #93] +; NONEON-NOSVE-NEXT: strb w9, [sp, #65] +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #432] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #138] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: strh w8, [sp, #136] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #416] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #118] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #82] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #128] +; NONEON-NOSVE-NEXT: strh w8, [sp, #116] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #81] +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #400] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #114] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #80] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #192] +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #384] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #112] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #368] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #126] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #86] +; NONEON-NOSVE-NEXT: strh w8, [sp, #124] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #85] +; NONEON-NOSVE-NEXT: strh w8, [sp, #122] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #84] +; NONEON-NOSVE-NEXT: strh w8, [sp, #120] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #107] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #112] +; NONEON-NOSVE-NEXT: strh w8, [sp, #166] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #106] +; NONEON-NOSVE-NEXT: strh w8, [sp, #164] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #105] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #176] +; NONEON-NOSVE-NEXT: strh w8, [sp, #162] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #104] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #182] +; NONEON-NOSVE-NEXT: strh w8, [sp, #160] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #111] +; NONEON-NOSVE-NEXT: strh w8, [sp, #174] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #110] +; NONEON-NOSVE-NEXT: strh w8, [sp, #172] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #109] +; NONEON-NOSVE-NEXT: strh w8, [sp, #170] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #108] +; NONEON-NOSVE-NEXT: strh w8, [sp, #168] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #99] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #160] +; NONEON-NOSVE-NEXT: strh w8, [sp, #150] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #98] +; NONEON-NOSVE-NEXT: strh w8, [sp, #148] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #97] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #224] +; NONEON-NOSVE-NEXT: strh w8, [sp, #146] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #96] +; NONEON-NOSVE-NEXT: strh w8, [sp, #144] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #103] +; NONEON-NOSVE-NEXT: strh w8, [sp, #158] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #102] +; NONEON-NOSVE-NEXT: strh w8, [sp, #156] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #101] +; NONEON-NOSVE-NEXT: strh w8, [sp, #154] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #100] +; NONEON-NOSVE-NEXT: strh w8, [sp, #152] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #198] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #144] +; NONEON-NOSVE-NEXT: str w8, [sp, #284] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #196] +; NONEON-NOSVE-NEXT: str w8, [sp, #280] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #194] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #208] +; NONEON-NOSVE-NEXT: str w8, [sp, #276] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #192] +; NONEON-NOSVE-NEXT: str w8, [sp, #272] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #206] +; NONEON-NOSVE-NEXT: str w8, [sp, #300] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #204] +; NONEON-NOSVE-NEXT: str w8, [sp, #296] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #202] +; NONEON-NOSVE-NEXT: str w8, [sp, #292] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #200] +; NONEON-NOSVE-NEXT: str w8, [sp, #288] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #180] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #272] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #248] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #178] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #176] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #240] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #190] +; NONEON-NOSVE-NEXT: str w8, [sp, #268] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #188] +; NONEON-NOSVE-NEXT: str w8, [sp, #264] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #186] +; NONEON-NOSVE-NEXT: str w8, [sp, #260] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #184] +; NONEON-NOSVE-NEXT: str w8, [sp, #256] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #230] +; NONEON-NOSVE-NEXT: ldp q3, q4, [sp, #240] +; NONEON-NOSVE-NEXT: str w8, [sp, #348] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #228] +; NONEON-NOSVE-NEXT: str w8, [sp, #344] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #226] +; NONEON-NOSVE-NEXT: str w8, [sp, #340] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #224] +; NONEON-NOSVE-NEXT: str w8, [sp, #336] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #238] +; NONEON-NOSVE-NEXT: str w8, [sp, #364] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #236] +; NONEON-NOSVE-NEXT: str w8, [sp, #360] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #234] +; NONEON-NOSVE-NEXT: str w8, [sp, #356] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #232] +; NONEON-NOSVE-NEXT: str w8, [sp, #352] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #214] +; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #336] +; NONEON-NOSVE-NEXT: str w8, [sp, #316] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #212] +; NONEON-NOSVE-NEXT: str w8, [sp, #312] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #210] +; NONEON-NOSVE-NEXT: str w8, [sp, #308] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #208] +; NONEON-NOSVE-NEXT: str w8, [sp, #304] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #222] +; NONEON-NOSVE-NEXT: str w8, [sp, #332] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #220] +; NONEON-NOSVE-NEXT: str w8, [sp, #328] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #218] +; NONEON-NOSVE-NEXT: str w8, [sp, #324] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #216] +; NONEON-NOSVE-NEXT: str w8, [sp, #320] +; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #304] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32] +; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64] +; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96] +; NONEON-NOSVE-NEXT: add sp, sp, #464 +; NONEON-NOSVE-NEXT: ret %a = load <32 x i8>, ptr %in %b = add <32 x i8> %a, %a %c = sext <32 x i8> %b to <32 x i32> @@ -194,6 +901,24 @@ define void @sext_v4i8_v4i64(<4 x i8> %a, ptr %out) { ; CHECK-NEXT: sxtb z0.d, p0/m, z0.d ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v4i8_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldrsb x8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrsb x9, [sp, #24] +; NONEON-NOSVE-NEXT: ldrsb x10, [sp, #16] +; NONEON-NOSVE-NEXT: ldrsb x11, [sp, #20] +; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #48] +; NONEON-NOSVE-NEXT: stp x10, x11, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %b = sext <4 x i8> %a to <4 x i64> store <4 x i64>%b, ptr %out ret void @@ -216,6 +941,61 @@ define void @sext_v8i8_v8i64(<8 x i8> %a, ptr %out) { ; CHECK-NEXT: stp q2, q1, [x0] ; CHECK-NEXT: stp q3, q0, [x0, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v8i8_v8i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #176 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 176 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: add x8, sp, #144 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strh w9, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w9, [sp, #28] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strh w9, [sp, #26] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] +; NONEON-NOSVE-NEXT: strh w9, [sp, #22] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strh w9, [sp, #20] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strh w9, [sp, #18] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #40] +; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #64] +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #44] +; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #72] +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #64] +; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #36] +; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #56] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #96] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #48] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #80] +; NONEON-NOSVE-NEXT: ldpsw x9, x10, [sp, #96] +; NONEON-NOSVE-NEXT: stp x9, x10, [sp, #144] +; NONEON-NOSVE-NEXT: ldpsw x9, x10, [sp, #104] +; NONEON-NOSVE-NEXT: stp x9, x10, [sp, #160] +; NONEON-NOSVE-NEXT: ldpsw x9, x10, [sp, #80] +; NONEON-NOSVE-NEXT: stp x9, x10, [sp, #112] +; NONEON-NOSVE-NEXT: ldpsw x9, x10, [sp, #88] +; NONEON-NOSVE-NEXT: stp x9, x10, [sp, #128] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #112] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x8] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: stp q2, q3, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #176 +; NONEON-NOSVE-NEXT: ret %b = sext <8 x i8> %a to <8 x i64> store <8 x i64>%b, ptr %out ret void @@ -253,6 +1033,113 @@ define void @sext_v16i8_v16i64(<16 x i8> %a, ptr %out) { ; CHECK-NEXT: stp q1, q4, [x0, #32] ; CHECK-NEXT: stp q0, q2, [x0, #96] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v16i8_v16i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #368 +; NONEON-NOSVE-NEXT: str x29, [sp, #352] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 368 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: ldr x29, [sp, #352] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #24] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #35] +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #34] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #33] +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #39] +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #38] +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #37] +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #36] +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #56] +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26] +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #25] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #88] +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #98] +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #31] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #29] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #96] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #40] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #102] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #100] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #160] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #90] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #88] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #72] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #152] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #94] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #92] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #82] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #80] +; NONEON-NOSVE-NEXT: str d0, [sp, #360] +; NONEON-NOSVE-NEXT: ldp d2, d0, [sp, #136] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #86] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #84] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #74] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #72] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #120] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #78] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #76] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] +; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #192] +; NONEON-NOSVE-NEXT: ldp d2, d0, [sp, #104] +; NONEON-NOSVE-NEXT: str d2, [sp, #168] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #176] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #216] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #320] +; NONEON-NOSVE-NEXT: ldrsw x9, [sp, #364] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #360] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #336] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #200] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #320] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #288] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #208] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #304] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #184] +; NONEON-NOSVE-NEXT: ldp q3, q4, [sp, #288] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #256] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #192] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #272] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #168] +; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #256] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #224] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #176] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #240] +; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #224] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q3, q4, [x0, #32] +; NONEON-NOSVE-NEXT: stp q6, q7, [x0, #64] +; NONEON-NOSVE-NEXT: stp q5, q2, [x0, #96] +; NONEON-NOSVE-NEXT: add sp, sp, #368 +; NONEON-NOSVE-NEXT: ret %b = sext <16 x i8> %a to <16 x i64> store <16 x i64> %b, ptr %out ret void @@ -321,6 +1208,371 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) { ; CHECK-NEXT: stp q0, q2, [x1, #224] ; CHECK-NEXT: stp q3, q1, [x1, #96] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v32i8_v32i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: sub sp, sp, #752 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 848 +; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 +; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 +; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 +; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 +; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 +; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 +; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 +; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 +; NONEON-NOSVE-NEXT: .cfi_offset w27, -72 +; NONEON-NOSVE-NEXT: .cfi_offset w28, -80 +; NONEON-NOSVE-NEXT: .cfi_offset w30, -88 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -96 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w29, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w27, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w25, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w23, [sp, #28] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w21, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w19, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w6, [sp, #38] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: ldrb w28, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #22] +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: add w8, w29, w29 +; NONEON-NOSVE-NEXT: ldrb w4, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: add w8, w27, w27 +; NONEON-NOSVE-NEXT: ldrb w26, [sp, #31] +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: add w8, w25, w25 +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: add w8, w23, w23 +; NONEON-NOSVE-NEXT: ldrb w2, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: add w8, w21, w21 +; NONEON-NOSVE-NEXT: ldrb w24, [sp, #29] +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: add w8, w19, w19 +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #23] +; NONEON-NOSVE-NEXT: strb w9, [sp, #59] +; NONEON-NOSVE-NEXT: add w9, w28, w28 +; NONEON-NOSVE-NEXT: add w18, w16, w16 +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: add w8, w6, w6 +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w22, [sp, #27] +; NONEON-NOSVE-NEXT: strb w9, [sp, #57] +; NONEON-NOSVE-NEXT: add w9, w26, w26 +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: add w8, w4, w4 +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w20, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w30, [sp, #21] +; NONEON-NOSVE-NEXT: strb w9, [sp, #55] +; NONEON-NOSVE-NEXT: add w9, w24, w24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: add w8, w2, w2 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44] +; NONEON-NOSVE-NEXT: add w17, w17, w17 +; NONEON-NOSVE-NEXT: strb w9, [sp, #53] +; NONEON-NOSVE-NEXT: add w9, w22, w22 +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: add w8, w16, w16 +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42] +; NONEON-NOSVE-NEXT: strb w17, [sp, #63] +; NONEON-NOSVE-NEXT: add w17, w30, w30 +; NONEON-NOSVE-NEXT: strb w9, [sp, #51] +; NONEON-NOSVE-NEXT: add w9, w20, w20 +; NONEON-NOSVE-NEXT: ldrb w7, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: add w8, w14, w14 +; NONEON-NOSVE-NEXT: ldrb w5, [sp, #37] +; NONEON-NOSVE-NEXT: strb w18, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w3, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w0, [sp, #33] +; NONEON-NOSVE-NEXT: strb w17, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #45] +; NONEON-NOSVE-NEXT: strb w9, [sp, #49] +; NONEON-NOSVE-NEXT: add w9, w7, w7 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: add w8, w12, w12 +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: add w8, w10, w10 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #79] +; NONEON-NOSVE-NEXT: add w9, w5, w5 +; NONEON-NOSVE-NEXT: strb w9, [sp, #77] +; NONEON-NOSVE-NEXT: add w9, w3, w3 +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #91] +; NONEON-NOSVE-NEXT: strb w9, [sp, #75] +; NONEON-NOSVE-NEXT: add w9, w0, w0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #134] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #90] +; NONEON-NOSVE-NEXT: strb w9, [sp, #73] +; NONEON-NOSVE-NEXT: add w9, w15, w15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #132] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #89] +; NONEON-NOSVE-NEXT: strb w9, [sp, #71] +; NONEON-NOSVE-NEXT: add w9, w13, w13 +; NONEON-NOSVE-NEXT: strh w8, [sp, #130] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #88] +; NONEON-NOSVE-NEXT: strb w9, [sp, #69] +; NONEON-NOSVE-NEXT: add w9, w11, w11 +; NONEON-NOSVE-NEXT: strh w8, [sp, #128] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #95] +; NONEON-NOSVE-NEXT: strb w9, [sp, #67] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #142] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #94] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: strh w8, [sp, #140] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #93] +; NONEON-NOSVE-NEXT: strb w9, [sp, #65] +; NONEON-NOSVE-NEXT: strh w8, [sp, #138] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: strh w8, [sp, #136] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #83] +; NONEON-NOSVE-NEXT: strh w8, [sp, #118] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #82] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #128] +; NONEON-NOSVE-NEXT: strh w8, [sp, #116] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #81] +; NONEON-NOSVE-NEXT: strh w8, [sp, #114] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #80] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #192] +; NONEON-NOSVE-NEXT: strh w8, [sp, #112] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #87] +; NONEON-NOSVE-NEXT: strh w8, [sp, #126] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #86] +; NONEON-NOSVE-NEXT: strh w8, [sp, #124] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #85] +; NONEON-NOSVE-NEXT: strh w8, [sp, #122] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #84] +; NONEON-NOSVE-NEXT: strh w8, [sp, #120] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #107] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #112] +; NONEON-NOSVE-NEXT: strh w8, [sp, #166] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #106] +; NONEON-NOSVE-NEXT: strh w8, [sp, #164] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #105] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #176] +; NONEON-NOSVE-NEXT: strh w8, [sp, #162] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #104] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #178] +; NONEON-NOSVE-NEXT: strh w8, [sp, #160] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #111] +; NONEON-NOSVE-NEXT: strh w8, [sp, #174] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #110] +; NONEON-NOSVE-NEXT: strh w8, [sp, #172] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #109] +; NONEON-NOSVE-NEXT: strh w8, [sp, #170] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #108] +; NONEON-NOSVE-NEXT: strh w8, [sp, #168] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #99] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #160] +; NONEON-NOSVE-NEXT: strh w8, [sp, #150] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #98] +; NONEON-NOSVE-NEXT: strh w8, [sp, #148] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #97] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #224] +; NONEON-NOSVE-NEXT: strh w8, [sp, #146] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #96] +; NONEON-NOSVE-NEXT: strh w8, [sp, #144] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #103] +; NONEON-NOSVE-NEXT: strh w8, [sp, #158] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #102] +; NONEON-NOSVE-NEXT: strh w8, [sp, #156] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #101] +; NONEON-NOSVE-NEXT: strh w8, [sp, #154] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #100] +; NONEON-NOSVE-NEXT: strh w8, [sp, #152] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #194] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #144] +; NONEON-NOSVE-NEXT: str w8, [sp, #276] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #192] +; NONEON-NOSVE-NEXT: str w8, [sp, #272] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #198] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #208] +; NONEON-NOSVE-NEXT: str w8, [sp, #284] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #196] +; NONEON-NOSVE-NEXT: str w8, [sp, #280] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #202] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #272] +; NONEON-NOSVE-NEXT: str w8, [sp, #292] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #200] +; NONEON-NOSVE-NEXT: str w8, [sp, #288] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #206] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #400] +; NONEON-NOSVE-NEXT: str w8, [sp, #300] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #204] +; NONEON-NOSVE-NEXT: str w8, [sp, #296] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #176] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #288] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #240] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #182] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #180] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #248] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #186] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #416] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #240] +; NONEON-NOSVE-NEXT: str w8, [sp, #260] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #184] +; NONEON-NOSVE-NEXT: str w8, [sp, #256] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #190] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #368] +; NONEON-NOSVE-NEXT: str w8, [sp, #268] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #188] +; NONEON-NOSVE-NEXT: ldrsw x9, [sp, #372] +; NONEON-NOSVE-NEXT: str w8, [sp, #264] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #226] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #256] +; NONEON-NOSVE-NEXT: str w8, [sp, #340] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #224] +; NONEON-NOSVE-NEXT: str w8, [sp, #336] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #230] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #384] +; NONEON-NOSVE-NEXT: str w8, [sp, #348] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #228] +; NONEON-NOSVE-NEXT: str w8, [sp, #344] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #234] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #336] +; NONEON-NOSVE-NEXT: str w8, [sp, #356] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #232] +; NONEON-NOSVE-NEXT: str w8, [sp, #352] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #238] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #464] +; NONEON-NOSVE-NEXT: str w8, [sp, #364] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #236] +; NONEON-NOSVE-NEXT: str w8, [sp, #360] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #210] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #352] +; NONEON-NOSVE-NEXT: str w8, [sp, #308] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #208] +; NONEON-NOSVE-NEXT: str w8, [sp, #304] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #214] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #480] +; NONEON-NOSVE-NEXT: str w8, [sp, #316] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #212] +; NONEON-NOSVE-NEXT: str w8, [sp, #312] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #218] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #304] +; NONEON-NOSVE-NEXT: str w8, [sp, #324] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #216] +; NONEON-NOSVE-NEXT: str w8, [sp, #320] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #222] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #432] +; NONEON-NOSVE-NEXT: str w8, [sp, #332] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #220] +; NONEON-NOSVE-NEXT: str w8, [sp, #328] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #404] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #320] +; NONEON-NOSVE-NEXT: str x8, [sp, #568] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #400] +; NONEON-NOSVE-NEXT: str x8, [sp, #560] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #412] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #448] +; NONEON-NOSVE-NEXT: str x8, [sp, #584] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #408] +; NONEON-NOSVE-NEXT: str x8, [sp, #576] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #420] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #560] +; NONEON-NOSVE-NEXT: str x8, [sp, #600] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #416] +; NONEON-NOSVE-NEXT: str x8, [sp, #592] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #428] +; NONEON-NOSVE-NEXT: str x8, [sp, #616] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #424] +; NONEON-NOSVE-NEXT: str x8, [sp, #608] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #368] +; NONEON-NOSVE-NEXT: ldp q2, q3, [sp, #592] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #496] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #380] +; NONEON-NOSVE-NEXT: str x8, [sp, #520] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #376] +; NONEON-NOSVE-NEXT: str x8, [sp, #512] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #388] +; NONEON-NOSVE-NEXT: ldp q4, q5, [sp, #496] +; NONEON-NOSVE-NEXT: str x8, [sp, #536] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #384] +; NONEON-NOSVE-NEXT: str x8, [sp, #528] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #396] +; NONEON-NOSVE-NEXT: str x8, [sp, #552] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #392] +; NONEON-NOSVE-NEXT: str x8, [sp, #544] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #468] +; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #528] +; NONEON-NOSVE-NEXT: str x8, [sp, #696] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #464] +; NONEON-NOSVE-NEXT: str x8, [sp, #688] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #476] +; NONEON-NOSVE-NEXT: str x8, [sp, #712] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #472] +; NONEON-NOSVE-NEXT: str x8, [sp, #704] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #484] +; NONEON-NOSVE-NEXT: ldp q16, q17, [sp, #688] +; NONEON-NOSVE-NEXT: str x8, [sp, #728] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #480] +; NONEON-NOSVE-NEXT: str x8, [sp, #720] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #492] +; NONEON-NOSVE-NEXT: str x8, [sp, #744] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #488] +; NONEON-NOSVE-NEXT: str x8, [sp, #736] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #436] +; NONEON-NOSVE-NEXT: ldp q19, q20, [sp, #720] +; NONEON-NOSVE-NEXT: str x8, [sp, #632] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #432] +; NONEON-NOSVE-NEXT: str x8, [sp, #624] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #444] +; NONEON-NOSVE-NEXT: str x8, [sp, #648] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #440] +; NONEON-NOSVE-NEXT: str x8, [sp, #640] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #452] +; NONEON-NOSVE-NEXT: ldp q22, q23, [sp, #624] +; NONEON-NOSVE-NEXT: str x8, [sp, #664] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #448] +; NONEON-NOSVE-NEXT: str x8, [sp, #656] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #460] +; NONEON-NOSVE-NEXT: str x8, [sp, #680] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #456] +; NONEON-NOSVE-NEXT: str x8, [sp, #672] +; NONEON-NOSVE-NEXT: ldp q21, q18, [sp, #656] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: stp q2, q3, [x1, #32] +; NONEON-NOSVE-NEXT: stp q4, q5, [x1, #64] +; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #96] +; NONEON-NOSVE-NEXT: stp q16, q17, [x1, #128] +; NONEON-NOSVE-NEXT: stp q19, q20, [x1, #160] +; NONEON-NOSVE-NEXT: stp q22, q23, [x1, #192] +; NONEON-NOSVE-NEXT: stp q21, q18, [x1, #224] +; NONEON-NOSVE-NEXT: add sp, sp, #752 +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ret %a = load <32 x i8>, ptr %in %b = add <32 x i8> %a, %a %c = sext <32 x i8> %b to <32 x i64> @@ -341,6 +1593,29 @@ define void @sext_v8i16_v8i32(<8 x i16> %a, ptr %out) { ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v8i16_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #26] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #22] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #18] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %b = sext <8 x i16> %a to <8 x i32> store <8 x i32>%b, ptr %out ret void @@ -361,6 +1636,95 @@ define void @sext_v16i16_v16i32(ptr %in, ptr %out) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v16i16_v16i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #160 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #4] +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #6] +; NONEON-NOSVE-NEXT: ldrh w3, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w5, [sp] +; NONEON-NOSVE-NEXT: ldrh w2, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w4, [sp, #14] +; NONEON-NOSVE-NEXT: add w13, w13, w13 +; NONEON-NOSVE-NEXT: add w14, w14, w14 +; NONEON-NOSVE-NEXT: ldrh w18, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w0, [sp, #10] +; NONEON-NOSVE-NEXT: strh w14, [sp, #46] +; NONEON-NOSVE-NEXT: add w14, w3, w3 +; NONEON-NOSVE-NEXT: strh w13, [sp, #44] +; NONEON-NOSVE-NEXT: add w13, w5, w5 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: strh w14, [sp, #42] +; NONEON-NOSVE-NEXT: add w14, w4, w4 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26] +; NONEON-NOSVE-NEXT: strh w13, [sp, #40] +; NONEON-NOSVE-NEXT: add w13, w2, w2 +; NONEON-NOSVE-NEXT: ldrh w17, [sp, #22] +; NONEON-NOSVE-NEXT: strh w14, [sp, #38] +; NONEON-NOSVE-NEXT: add w14, w0, w0 +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: strh w13, [sp, #36] +; NONEON-NOSVE-NEXT: add w13, w18, w18 +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w14, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #30] +; NONEON-NOSVE-NEXT: strh w13, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #18] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w16, [sp, #20] +; NONEON-NOSVE-NEXT: strh w9, [sp, #50] +; NONEON-NOSVE-NEXT: add w14, w17, w17 +; NONEON-NOSVE-NEXT: add w12, w12, w12 +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: add w13, w16, w16 +; NONEON-NOSVE-NEXT: add w11, w11, w11 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: add w10, w10, w10 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #78] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #76] +; NONEON-NOSVE-NEXT: strh w14, [sp, #62] +; NONEON-NOSVE-NEXT: add w14, w15, w15 +; NONEON-NOSVE-NEXT: strh w13, [sp, #60] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #74] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #72] +; NONEON-NOSVE-NEXT: strh w14, [sp, #58] +; NONEON-NOSVE-NEXT: strh w12, [sp, #56] +; NONEON-NOSVE-NEXT: strh w11, [sp, #54] +; NONEON-NOSVE-NEXT: strh w10, [sp, #52] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #70] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #68] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #66] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #64] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #94] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #96] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #90] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #88] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #86] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #84] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #82] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #80] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #128] +; NONEON-NOSVE-NEXT: stp q2, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: ret %a = load <16 x i16>, ptr %in %b = add <16 x i16> %a, %a %c = sext <16 x i16> %b to <16 x i32> @@ -382,6 +1746,28 @@ define void @sext_v4i16_v4i64(<4 x i16> %a, ptr %out) { ; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v4i16_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #80 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #12] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #40] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #64] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %b = sext <4 x i16> %a to <4 x i64> store <4 x i64>%b, ptr %out ret void @@ -403,6 +1789,43 @@ define void @sext_v8i16_v8i64(<8 x i16> %a, ptr %out) { ; CHECK-NEXT: stp q2, q1, [x0] ; CHECK-NEXT: stp q3, q0, [x0, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v8i16_v8i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-160]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #26] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #18] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #22] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #88] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #144] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #80] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #128] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #72] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #128] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #112] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #64] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q2, q3, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: ret %b = sext <8 x i16> %a to <8 x i64> store <8 x i64>%b, ptr %out ret void @@ -437,6 +1860,128 @@ define void @sext_v16i16_v16i64(ptr %in, ptr %out) { ; CHECK-NEXT: stp q6, q0, [x1, #96] ; CHECK-NEXT: stp q7, q1, [x1, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v16i16_v16i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #368 +; NONEON-NOSVE-NEXT: str x29, [sp, #352] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 368 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ldr x29, [sp, #352] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: stp q1, q0, [sp] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #4] +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #6] +; NONEON-NOSVE-NEXT: ldrh w3, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w5, [sp] +; NONEON-NOSVE-NEXT: ldrh w2, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w4, [sp, #14] +; NONEON-NOSVE-NEXT: add w13, w13, w13 +; NONEON-NOSVE-NEXT: add w14, w14, w14 +; NONEON-NOSVE-NEXT: ldrh w18, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w0, [sp, #10] +; NONEON-NOSVE-NEXT: strh w14, [sp, #54] +; NONEON-NOSVE-NEXT: add w14, w3, w3 +; NONEON-NOSVE-NEXT: strh w13, [sp, #52] +; NONEON-NOSVE-NEXT: add w13, w5, w5 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: strh w14, [sp, #50] +; NONEON-NOSVE-NEXT: add w14, w4, w4 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26] +; NONEON-NOSVE-NEXT: strh w13, [sp, #48] +; NONEON-NOSVE-NEXT: add w13, w2, w2 +; NONEON-NOSVE-NEXT: ldrh w17, [sp, #22] +; NONEON-NOSVE-NEXT: strh w14, [sp, #46] +; NONEON-NOSVE-NEXT: add w14, w0, w0 +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: strh w13, [sp, #44] +; NONEON-NOSVE-NEXT: add w13, w18, w18 +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w14, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #30] +; NONEON-NOSVE-NEXT: strh w13, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #18] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w16, [sp, #20] +; NONEON-NOSVE-NEXT: strh w9, [sp, #58] +; NONEON-NOSVE-NEXT: add w14, w17, w17 +; NONEON-NOSVE-NEXT: add w12, w12, w12 +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: add w13, w16, w16 +; NONEON-NOSVE-NEXT: add w11, w11, w11 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #72] +; NONEON-NOSVE-NEXT: add w10, w10, w10 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #82] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #80] +; NONEON-NOSVE-NEXT: strh w14, [sp, #70] +; NONEON-NOSVE-NEXT: add w14, w15, w15 +; NONEON-NOSVE-NEXT: strh w13, [sp, #68] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #86] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #84] +; NONEON-NOSVE-NEXT: strh w14, [sp, #66] +; NONEON-NOSVE-NEXT: strh w12, [sp, #64] +; NONEON-NOSVE-NEXT: strh w11, [sp, #62] +; NONEON-NOSVE-NEXT: strh w10, [sp, #60] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #56] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #74] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #72] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #78] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #76] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #88] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #120] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #98] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #96] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #102] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #100] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #184] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #104] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #160] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #90] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #88] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #94] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #92] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #168] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #152] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] +; NONEON-NOSVE-NEXT: str d0, [sp, #360] +; NONEON-NOSVE-NEXT: ldp d2, d0, [sp, #136] +; NONEON-NOSVE-NEXT: str d2, [sp, #200] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #184] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #256] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #192] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #272] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #168] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #256] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #224] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #176] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #240] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #216] +; NONEON-NOSVE-NEXT: ldp q3, q4, [sp, #224] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #320] +; NONEON-NOSVE-NEXT: ldrsw x9, [sp, #364] +; NONEON-NOSVE-NEXT: ldrsw x8, [sp, #360] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #336] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #200] +; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #320] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #288] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #208] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #304] +; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #288] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32] +; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64] +; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96] +; NONEON-NOSVE-NEXT: add sp, sp, #368 +; NONEON-NOSVE-NEXT: ret %a = load <16 x i16>, ptr %in %b = add <16 x i16> %a, %a %c = sext <16 x i16> %b to <16 x i64> @@ -457,6 +2002,21 @@ define void @sext_v4i32_v4i64(<4 x i32> %a, ptr %out) { ; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v4i32_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #24] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %b = sext <4 x i32> %a to <4 x i64> store <4 x i64>%b, ptr %out ret void @@ -477,6 +2037,47 @@ define void @sext_v8i32_v8i64(ptr %in, ptr %out) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v8i32_v8i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #160 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp] +; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #16] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: ldp w10, w11, [sp, #24] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: add w9, w13, w13 +; NONEON-NOSVE-NEXT: add w8, w12, w12 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: add w9, w15, w15 +; NONEON-NOSVE-NEXT: add w8, w14, w14 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: add w9, w11, w11 +; NONEON-NOSVE-NEXT: add w8, w10, w10 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #72] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #112] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #64] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #96] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #88] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #96] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #144] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #80] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #128] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #128] +; NONEON-NOSVE-NEXT: stp q2, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: ret %a = load <8 x i32>, ptr %in %b = add <8 x i32> %a, %a %c = sext <8 x i32> %b to <8 x i64> @@ -497,6 +2098,49 @@ define void @zext_v16i8_v16i16(<16 x i8> %a, ptr %out) { ; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v16i8_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %b = zext <16 x i8> %a to <16 x i16> store <16 x i16>%b, ptr %out ret void @@ -518,6 +2162,210 @@ define void @zext_v32i8_v32i16(ptr %in, ptr %out) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v32i8_v32i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #272 +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #176] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #192] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #208] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #224] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #240] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #256] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 272 +; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 +; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 +; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 +; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 +; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 +; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 +; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 +; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 +; NONEON-NOSVE-NEXT: .cfi_offset w27, -72 +; NONEON-NOSVE-NEXT: .cfi_offset w28, -80 +; NONEON-NOSVE-NEXT: .cfi_offset w30, -88 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -96 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w29, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w27, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w25, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w23, [sp, #28] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w21, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w19, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w6, [sp, #38] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: ldrb w28, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #22] +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: add w8, w29, w29 +; NONEON-NOSVE-NEXT: ldrb w4, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: add w8, w27, w27 +; NONEON-NOSVE-NEXT: ldrb w26, [sp, #31] +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: add w8, w25, w25 +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: add w8, w23, w23 +; NONEON-NOSVE-NEXT: ldrb w2, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: add w8, w21, w21 +; NONEON-NOSVE-NEXT: ldrb w24, [sp, #29] +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: add w8, w19, w19 +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #23] +; NONEON-NOSVE-NEXT: strb w9, [sp, #59] +; NONEON-NOSVE-NEXT: add w9, w28, w28 +; NONEON-NOSVE-NEXT: add w18, w16, w16 +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: add w8, w6, w6 +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w22, [sp, #27] +; NONEON-NOSVE-NEXT: strb w9, [sp, #57] +; NONEON-NOSVE-NEXT: add w9, w26, w26 +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: add w8, w4, w4 +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w20, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w30, [sp, #21] +; NONEON-NOSVE-NEXT: strb w9, [sp, #55] +; NONEON-NOSVE-NEXT: add w9, w24, w24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: add w8, w2, w2 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44] +; NONEON-NOSVE-NEXT: add w17, w17, w17 +; NONEON-NOSVE-NEXT: strb w9, [sp, #53] +; NONEON-NOSVE-NEXT: add w9, w22, w22 +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: add w8, w16, w16 +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42] +; NONEON-NOSVE-NEXT: strb w17, [sp, #63] +; NONEON-NOSVE-NEXT: add w17, w30, w30 +; NONEON-NOSVE-NEXT: strb w9, [sp, #51] +; NONEON-NOSVE-NEXT: add w9, w20, w20 +; NONEON-NOSVE-NEXT: ldrb w7, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: add w8, w14, w14 +; NONEON-NOSVE-NEXT: ldrb w5, [sp, #37] +; NONEON-NOSVE-NEXT: strb w18, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w3, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w0, [sp, #33] +; NONEON-NOSVE-NEXT: strb w17, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #45] +; NONEON-NOSVE-NEXT: strb w9, [sp, #49] +; NONEON-NOSVE-NEXT: add w9, w7, w7 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: add w8, w12, w12 +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: add w8, w10, w10 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #79] +; NONEON-NOSVE-NEXT: add w9, w5, w5 +; NONEON-NOSVE-NEXT: strb w9, [sp, #77] +; NONEON-NOSVE-NEXT: add w9, w3, w3 +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #95] +; NONEON-NOSVE-NEXT: strb w9, [sp, #75] +; NONEON-NOSVE-NEXT: add w9, w0, w0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #142] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #94] +; NONEON-NOSVE-NEXT: strb w9, [sp, #73] +; NONEON-NOSVE-NEXT: add w9, w15, w15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #140] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #93] +; NONEON-NOSVE-NEXT: strb w9, [sp, #71] +; NONEON-NOSVE-NEXT: add w9, w13, w13 +; NONEON-NOSVE-NEXT: strh w8, [sp, #138] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #92] +; NONEON-NOSVE-NEXT: strb w9, [sp, #69] +; NONEON-NOSVE-NEXT: add w9, w11, w11 +; NONEON-NOSVE-NEXT: strh w8, [sp, #136] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #91] +; NONEON-NOSVE-NEXT: strb w9, [sp, #67] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #134] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #90] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #256] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #132] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #89] +; NONEON-NOSVE-NEXT: strb w9, [sp, #65] +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #240] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #130] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: strh w8, [sp, #128] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #224] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #126] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #86] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #208] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #124] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #85] +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #192] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #122] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #176] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #120] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #83] +; NONEON-NOSVE-NEXT: strh w8, [sp, #118] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #82] +; NONEON-NOSVE-NEXT: strh w8, [sp, #116] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #81] +; NONEON-NOSVE-NEXT: strh w8, [sp, #114] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #80] +; NONEON-NOSVE-NEXT: strh w8, [sp, #112] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #111] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #112] +; NONEON-NOSVE-NEXT: strh w8, [sp, #174] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #110] +; NONEON-NOSVE-NEXT: strh w8, [sp, #172] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #109] +; NONEON-NOSVE-NEXT: strh w8, [sp, #170] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #108] +; NONEON-NOSVE-NEXT: strh w8, [sp, #168] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #107] +; NONEON-NOSVE-NEXT: strh w8, [sp, #166] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #106] +; NONEON-NOSVE-NEXT: strh w8, [sp, #164] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #105] +; NONEON-NOSVE-NEXT: strh w8, [sp, #162] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #104] +; NONEON-NOSVE-NEXT: strh w8, [sp, #160] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #103] +; NONEON-NOSVE-NEXT: strh w8, [sp, #158] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #102] +; NONEON-NOSVE-NEXT: strh w8, [sp, #156] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #101] +; NONEON-NOSVE-NEXT: strh w8, [sp, #154] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #100] +; NONEON-NOSVE-NEXT: strh w8, [sp, #152] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #99] +; NONEON-NOSVE-NEXT: strh w8, [sp, #150] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #98] +; NONEON-NOSVE-NEXT: strh w8, [sp, #148] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #97] +; NONEON-NOSVE-NEXT: strh w8, [sp, #146] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #96] +; NONEON-NOSVE-NEXT: strh w8, [sp, #144] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #144] +; NONEON-NOSVE-NEXT: stp q2, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #272 +; NONEON-NOSVE-NEXT: ret %a = load <32 x i8>, ptr %in %b = add <32 x i8> %a, %a %c = zext <32 x i8> %b to <32 x i16> @@ -539,6 +2387,46 @@ define void @zext_v8i8_v8i32(<8 x i8> %a, ptr %out) { ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v8i8_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #80 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: strh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: strh w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: strh w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #44] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #40] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #36] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %b = zext <8 x i8> %a to <8 x i32> store <8 x i32>%b, ptr %out ret void @@ -560,6 +2448,79 @@ define void @zext_v16i8_v16i32(<16 x i8> %a, ptr %out) { ; CHECK-NEXT: stp q2, q1, [x0] ; CHECK-NEXT: stp q3, q0, [x0, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v16i8_v16i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-160]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #94] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #90] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #88] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #86] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #84] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #82] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #80] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #78] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #128] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #74] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #72] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #70] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #68] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #66] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #64] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q2, q3, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: ret %b = zext <16 x i8> %a to <16 x i32> store <16 x i32> %b, ptr %out ret void @@ -594,6 +2555,284 @@ define void @zext_v32i8_v32i32(ptr %in, ptr %out) { ; CHECK-NEXT: stp q6, q0, [x1, #96] ; CHECK-NEXT: stp q7, q1, [x1, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v32i8_v32i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #464 +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #368] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #384] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #400] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #416] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #432] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #448] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 464 +; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 +; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 +; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 +; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 +; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 +; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 +; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 +; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 +; NONEON-NOSVE-NEXT: .cfi_offset w27, -72 +; NONEON-NOSVE-NEXT: .cfi_offset w28, -80 +; NONEON-NOSVE-NEXT: .cfi_offset w30, -88 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -96 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w29, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w27, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w25, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w23, [sp, #28] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w21, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w19, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w6, [sp, #38] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: ldrb w28, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #22] +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: add w8, w29, w29 +; NONEON-NOSVE-NEXT: ldrb w4, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: add w8, w27, w27 +; NONEON-NOSVE-NEXT: ldrb w26, [sp, #31] +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: add w8, w25, w25 +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: add w8, w23, w23 +; NONEON-NOSVE-NEXT: ldrb w2, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: add w8, w21, w21 +; NONEON-NOSVE-NEXT: ldrb w24, [sp, #29] +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: add w8, w19, w19 +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #23] +; NONEON-NOSVE-NEXT: strb w9, [sp, #59] +; NONEON-NOSVE-NEXT: add w9, w28, w28 +; NONEON-NOSVE-NEXT: add w18, w16, w16 +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: add w8, w6, w6 +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w22, [sp, #27] +; NONEON-NOSVE-NEXT: strb w9, [sp, #57] +; NONEON-NOSVE-NEXT: add w9, w26, w26 +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: add w8, w4, w4 +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w20, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w30, [sp, #21] +; NONEON-NOSVE-NEXT: strb w9, [sp, #55] +; NONEON-NOSVE-NEXT: add w9, w24, w24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: add w8, w2, w2 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44] +; NONEON-NOSVE-NEXT: add w17, w17, w17 +; NONEON-NOSVE-NEXT: strb w9, [sp, #53] +; NONEON-NOSVE-NEXT: add w9, w22, w22 +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: add w8, w16, w16 +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42] +; NONEON-NOSVE-NEXT: strb w17, [sp, #63] +; NONEON-NOSVE-NEXT: add w17, w30, w30 +; NONEON-NOSVE-NEXT: strb w9, [sp, #51] +; NONEON-NOSVE-NEXT: add w9, w20, w20 +; NONEON-NOSVE-NEXT: ldrb w7, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: add w8, w14, w14 +; NONEON-NOSVE-NEXT: ldrb w5, [sp, #37] +; NONEON-NOSVE-NEXT: strb w18, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w3, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w0, [sp, #33] +; NONEON-NOSVE-NEXT: strb w17, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #45] +; NONEON-NOSVE-NEXT: strb w9, [sp, #49] +; NONEON-NOSVE-NEXT: add w9, w7, w7 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: add w8, w12, w12 +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: add w8, w10, w10 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #79] +; NONEON-NOSVE-NEXT: add w9, w5, w5 +; NONEON-NOSVE-NEXT: strb w9, [sp, #77] +; NONEON-NOSVE-NEXT: add w9, w3, w3 +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #91] +; NONEON-NOSVE-NEXT: strb w9, [sp, #75] +; NONEON-NOSVE-NEXT: add w9, w0, w0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #134] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #90] +; NONEON-NOSVE-NEXT: strb w9, [sp, #73] +; NONEON-NOSVE-NEXT: add w9, w15, w15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #132] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #89] +; NONEON-NOSVE-NEXT: strb w9, [sp, #71] +; NONEON-NOSVE-NEXT: add w9, w13, w13 +; NONEON-NOSVE-NEXT: strh w8, [sp, #130] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #88] +; NONEON-NOSVE-NEXT: strb w9, [sp, #69] +; NONEON-NOSVE-NEXT: add w9, w11, w11 +; NONEON-NOSVE-NEXT: strh w8, [sp, #128] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #95] +; NONEON-NOSVE-NEXT: strb w9, [sp, #67] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #142] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #94] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #448] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #140] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #93] +; NONEON-NOSVE-NEXT: strb w9, [sp, #65] +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #432] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #138] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: strh w8, [sp, #136] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #416] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #118] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #82] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #128] +; NONEON-NOSVE-NEXT: strh w8, [sp, #116] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #81] +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #400] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #114] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #80] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #192] +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #384] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #112] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #368] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #126] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #86] +; NONEON-NOSVE-NEXT: strh w8, [sp, #124] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #85] +; NONEON-NOSVE-NEXT: strh w8, [sp, #122] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #84] +; NONEON-NOSVE-NEXT: strh w8, [sp, #120] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #107] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #112] +; NONEON-NOSVE-NEXT: strh w8, [sp, #166] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #106] +; NONEON-NOSVE-NEXT: strh w8, [sp, #164] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #105] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #176] +; NONEON-NOSVE-NEXT: strh w8, [sp, #162] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #104] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #182] +; NONEON-NOSVE-NEXT: strh w8, [sp, #160] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #111] +; NONEON-NOSVE-NEXT: strh w8, [sp, #174] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #110] +; NONEON-NOSVE-NEXT: strh w8, [sp, #172] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #109] +; NONEON-NOSVE-NEXT: strh w8, [sp, #170] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #108] +; NONEON-NOSVE-NEXT: strh w8, [sp, #168] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #99] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #160] +; NONEON-NOSVE-NEXT: strh w8, [sp, #150] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #98] +; NONEON-NOSVE-NEXT: strh w8, [sp, #148] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #97] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #224] +; NONEON-NOSVE-NEXT: strh w8, [sp, #146] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #96] +; NONEON-NOSVE-NEXT: strh w8, [sp, #144] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #103] +; NONEON-NOSVE-NEXT: strh w8, [sp, #158] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #102] +; NONEON-NOSVE-NEXT: strh w8, [sp, #156] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #101] +; NONEON-NOSVE-NEXT: strh w8, [sp, #154] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #100] +; NONEON-NOSVE-NEXT: strh w8, [sp, #152] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #198] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #144] +; NONEON-NOSVE-NEXT: str w8, [sp, #284] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #196] +; NONEON-NOSVE-NEXT: str w8, [sp, #280] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #194] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #208] +; NONEON-NOSVE-NEXT: str w8, [sp, #276] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #192] +; NONEON-NOSVE-NEXT: str w8, [sp, #272] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #206] +; NONEON-NOSVE-NEXT: str w8, [sp, #300] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #204] +; NONEON-NOSVE-NEXT: str w8, [sp, #296] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #202] +; NONEON-NOSVE-NEXT: str w8, [sp, #292] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #200] +; NONEON-NOSVE-NEXT: str w8, [sp, #288] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #180] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #272] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #248] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #178] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #176] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #240] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #190] +; NONEON-NOSVE-NEXT: str w8, [sp, #268] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #188] +; NONEON-NOSVE-NEXT: str w8, [sp, #264] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #186] +; NONEON-NOSVE-NEXT: str w8, [sp, #260] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #184] +; NONEON-NOSVE-NEXT: str w8, [sp, #256] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #230] +; NONEON-NOSVE-NEXT: ldp q3, q4, [sp, #240] +; NONEON-NOSVE-NEXT: str w8, [sp, #348] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #228] +; NONEON-NOSVE-NEXT: str w8, [sp, #344] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #226] +; NONEON-NOSVE-NEXT: str w8, [sp, #340] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #224] +; NONEON-NOSVE-NEXT: str w8, [sp, #336] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #238] +; NONEON-NOSVE-NEXT: str w8, [sp, #364] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #236] +; NONEON-NOSVE-NEXT: str w8, [sp, #360] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #234] +; NONEON-NOSVE-NEXT: str w8, [sp, #356] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #232] +; NONEON-NOSVE-NEXT: str w8, [sp, #352] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #214] +; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #336] +; NONEON-NOSVE-NEXT: str w8, [sp, #316] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #212] +; NONEON-NOSVE-NEXT: str w8, [sp, #312] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #210] +; NONEON-NOSVE-NEXT: str w8, [sp, #308] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #208] +; NONEON-NOSVE-NEXT: str w8, [sp, #304] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #222] +; NONEON-NOSVE-NEXT: str w8, [sp, #332] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #220] +; NONEON-NOSVE-NEXT: str w8, [sp, #328] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #218] +; NONEON-NOSVE-NEXT: str w8, [sp, #324] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #216] +; NONEON-NOSVE-NEXT: str w8, [sp, #320] +; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #304] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32] +; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64] +; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96] +; NONEON-NOSVE-NEXT: add sp, sp, #464 +; NONEON-NOSVE-NEXT: ret %a = load <32 x i8>, ptr %in %b = add <32 x i8> %a, %a %c = zext <32 x i8> %b to <32 x i32> @@ -619,6 +2858,30 @@ define void @zext_v4i8_v4i64(<4 x i8> %a, ptr %out) { ; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v4i8_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #80 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #72] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #64] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #56] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #48] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %b = zext <4 x i8> %a to <4 x i64> store <4 x i64>%b, ptr %out ret void @@ -641,6 +2904,65 @@ define void @zext_v8i8_v8i64(<8 x i8> %a, ptr %out) { ; CHECK-NEXT: stp q2, q1, [x0] ; CHECK-NEXT: stp q3, q0, [x0, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v8i8_v8i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #176 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 176 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: add x8, sp, #144 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strh w9, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w9, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strh w9, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: strh w9, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strh w9, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strh w9, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #64] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44] +; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #72] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #64] +; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #36] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #96] +; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #56] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #96] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #48] +; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #152] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #144] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #104] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #80] +; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #168] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #160] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #80] +; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #120] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #112] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #88] +; NONEON-NOSVE-NEXT: stp w10, wzr, [sp, #136] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #128] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #112] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x8] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: stp q2, q3, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #176 +; NONEON-NOSVE-NEXT: ret %b = zext <8 x i8> %a to <8 x i64> store <8 x i64>%b, ptr %out ret void @@ -678,6 +3000,133 @@ define void @zext_v16i8_v16i64(<16 x i8> %a, ptr %out) { ; CHECK-NEXT: stp q1, q4, [x0, #32] ; CHECK-NEXT: stp q0, q2, [x0, #96] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v16i8_v16i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #368 +; NONEON-NOSVE-NEXT: str x29, [sp, #352] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 368 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: ldr x29, [sp, #352] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: str wzr, [sp, #332] +; NONEON-NOSVE-NEXT: str wzr, [sp, #324] +; NONEON-NOSVE-NEXT: str wzr, [sp, #348] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #35] +; NONEON-NOSVE-NEXT: str wzr, [sp, #340] +; NONEON-NOSVE-NEXT: str wzr, [sp, #300] +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #34] +; NONEON-NOSVE-NEXT: str wzr, [sp, #292] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #33] +; NONEON-NOSVE-NEXT: str wzr, [sp, #316] +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #32] +; NONEON-NOSVE-NEXT: str wzr, [sp, #308] +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #39] +; NONEON-NOSVE-NEXT: str wzr, [sp, #268] +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #38] +; NONEON-NOSVE-NEXT: str wzr, [sp, #260] +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #37] +; NONEON-NOSVE-NEXT: str wzr, [sp, #284] +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #36] +; NONEON-NOSVE-NEXT: str wzr, [sp, #276] +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #56] +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #88] +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #98] +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #96] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #40] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #102] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #100] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #160] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #90] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #88] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #72] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #152] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #94] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #92] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #82] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #80] +; NONEON-NOSVE-NEXT: str d0, [sp, #360] +; NONEON-NOSVE-NEXT: ldp d2, d0, [sp, #136] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #86] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #84] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #74] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #72] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #120] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #78] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #76] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #216] +; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #192] +; NONEON-NOSVE-NEXT: ldp d2, d0, [sp, #104] +; NONEON-NOSVE-NEXT: str w8, [sp, #320] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #364] +; NONEON-NOSVE-NEXT: str w9, [sp, #328] +; NONEON-NOSVE-NEXT: str w8, [sp, #344] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #360] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #176] +; NONEON-NOSVE-NEXT: str w8, [sp, #336] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #200] +; NONEON-NOSVE-NEXT: str d2, [sp, #168] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #320] +; NONEON-NOSVE-NEXT: str w9, [sp, #296] +; NONEON-NOSVE-NEXT: str w8, [sp, #288] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #208] +; NONEON-NOSVE-NEXT: str w9, [sp, #312] +; NONEON-NOSVE-NEXT: str w8, [sp, #304] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #184] +; NONEON-NOSVE-NEXT: ldp q3, q4, [sp, #288] +; NONEON-NOSVE-NEXT: str w9, [sp, #264] +; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #252] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #192] +; NONEON-NOSVE-NEXT: str w9, [sp, #280] +; NONEON-NOSVE-NEXT: str w8, [sp, #272] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #168] +; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #256] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #232] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #224] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #176] +; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #244] +; NONEON-NOSVE-NEXT: str w8, [sp, #240] +; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #224] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q3, q4, [x0, #32] +; NONEON-NOSVE-NEXT: stp q6, q7, [x0, #64] +; NONEON-NOSVE-NEXT: stp q5, q2, [x0, #96] +; NONEON-NOSVE-NEXT: add sp, sp, #368 +; NONEON-NOSVE-NEXT: ret %b = zext <16 x i8> %a to <16 x i64> store <16 x i64> %b, ptr %out ret void @@ -746,6 +3195,404 @@ define void @zext_v32i8_v32i64(ptr %in, ptr %out) { ; CHECK-NEXT: stp q0, q2, [x1, #224] ; CHECK-NEXT: stp q3, q1, [x1, #96] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v32i8_v32i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: sub sp, sp, #752 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 848 +; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 +; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 +; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 +; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 +; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 +; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 +; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 +; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 +; NONEON-NOSVE-NEXT: .cfi_offset w27, -72 +; NONEON-NOSVE-NEXT: .cfi_offset w28, -80 +; NONEON-NOSVE-NEXT: .cfi_offset w30, -88 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -96 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: str wzr, [sp, #572] +; NONEON-NOSVE-NEXT: str wzr, [sp, #564] +; NONEON-NOSVE-NEXT: str wzr, [sp, #588] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w29, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w27, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w25, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w23, [sp, #28] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w21, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w19, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w6, [sp, #38] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: ldrb w28, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #22] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: add w8, w29, w29 +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: add w8, w27, w27 +; NONEON-NOSVE-NEXT: add w18, w16, w16 +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: add w8, w25, w25 +; NONEON-NOSVE-NEXT: ldrb w4, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: add w8, w23, w23 +; NONEON-NOSVE-NEXT: ldrb w26, [sp, #31] +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: add w8, w21, w21 +; NONEON-NOSVE-NEXT: ldrb w2, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: add w8, w19, w19 +; NONEON-NOSVE-NEXT: ldrb w24, [sp, #29] +; NONEON-NOSVE-NEXT: strb w9, [sp, #59] +; NONEON-NOSVE-NEXT: add w9, w28, w28 +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #23] +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: add w8, w6, w6 +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w22, [sp, #27] +; NONEON-NOSVE-NEXT: strb w9, [sp, #57] +; NONEON-NOSVE-NEXT: add w9, w26, w26 +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: add w8, w4, w4 +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w20, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w30, [sp, #21] +; NONEON-NOSVE-NEXT: add w17, w17, w17 +; NONEON-NOSVE-NEXT: strb w9, [sp, #55] +; NONEON-NOSVE-NEXT: add w9, w24, w24 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: add w8, w2, w2 +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42] +; NONEON-NOSVE-NEXT: strb w9, [sp, #53] +; NONEON-NOSVE-NEXT: add w9, w22, w22 +; NONEON-NOSVE-NEXT: ldrb w7, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: add w8, w16, w16 +; NONEON-NOSVE-NEXT: ldrb w5, [sp, #37] +; NONEON-NOSVE-NEXT: strb w17, [sp, #63] +; NONEON-NOSVE-NEXT: add w17, w30, w30 +; NONEON-NOSVE-NEXT: ldrb w3, [sp, #35] +; NONEON-NOSVE-NEXT: strb w9, [sp, #51] +; NONEON-NOSVE-NEXT: add w9, w20, w20 +; NONEON-NOSVE-NEXT: ldrb w0, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: add w8, w14, w14 +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #47] +; NONEON-NOSVE-NEXT: strb w18, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43] +; NONEON-NOSVE-NEXT: strb w17, [sp, #61] +; NONEON-NOSVE-NEXT: strb w9, [sp, #49] +; NONEON-NOSVE-NEXT: add w9, w7, w7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: add w8, w12, w12 +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: add w8, w10, w10 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #79] +; NONEON-NOSVE-NEXT: add w9, w5, w5 +; NONEON-NOSVE-NEXT: strb w9, [sp, #77] +; NONEON-NOSVE-NEXT: add w9, w3, w3 +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #91] +; NONEON-NOSVE-NEXT: strb w9, [sp, #75] +; NONEON-NOSVE-NEXT: add w9, w0, w0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #134] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #90] +; NONEON-NOSVE-NEXT: strb w9, [sp, #73] +; NONEON-NOSVE-NEXT: add w9, w15, w15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #132] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #89] +; NONEON-NOSVE-NEXT: strb w9, [sp, #71] +; NONEON-NOSVE-NEXT: add w9, w13, w13 +; NONEON-NOSVE-NEXT: strh w8, [sp, #130] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #88] +; NONEON-NOSVE-NEXT: strb w9, [sp, #69] +; NONEON-NOSVE-NEXT: add w9, w11, w11 +; NONEON-NOSVE-NEXT: strh w8, [sp, #128] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #95] +; NONEON-NOSVE-NEXT: strb w9, [sp, #67] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #142] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #94] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: str wzr, [sp, #580] +; NONEON-NOSVE-NEXT: strh w8, [sp, #140] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #93] +; NONEON-NOSVE-NEXT: strb w9, [sp, #65] +; NONEON-NOSVE-NEXT: strh w8, [sp, #138] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: str wzr, [sp, #604] +; NONEON-NOSVE-NEXT: strh w8, [sp, #136] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #83] +; NONEON-NOSVE-NEXT: str wzr, [sp, #596] +; NONEON-NOSVE-NEXT: strh w8, [sp, #118] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #82] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #128] +; NONEON-NOSVE-NEXT: strh w8, [sp, #116] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #81] +; NONEON-NOSVE-NEXT: str wzr, [sp, #620] +; NONEON-NOSVE-NEXT: strh w8, [sp, #114] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #80] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #192] +; NONEON-NOSVE-NEXT: strh w8, [sp, #112] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #87] +; NONEON-NOSVE-NEXT: str wzr, [sp, #612] +; NONEON-NOSVE-NEXT: strh w8, [sp, #126] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #86] +; NONEON-NOSVE-NEXT: str wzr, [sp, #508] +; NONEON-NOSVE-NEXT: strh w8, [sp, #124] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #85] +; NONEON-NOSVE-NEXT: str wzr, [sp, #500] +; NONEON-NOSVE-NEXT: strh w8, [sp, #122] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #84] +; NONEON-NOSVE-NEXT: str wzr, [sp, #524] +; NONEON-NOSVE-NEXT: strh w8, [sp, #120] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #107] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #112] +; NONEON-NOSVE-NEXT: str wzr, [sp, #516] +; NONEON-NOSVE-NEXT: strh w8, [sp, #166] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #106] +; NONEON-NOSVE-NEXT: str wzr, [sp, #540] +; NONEON-NOSVE-NEXT: strh w8, [sp, #164] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #105] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #176] +; NONEON-NOSVE-NEXT: strh w8, [sp, #162] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #104] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #178] +; NONEON-NOSVE-NEXT: str wzr, [sp, #532] +; NONEON-NOSVE-NEXT: strh w8, [sp, #160] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #111] +; NONEON-NOSVE-NEXT: str wzr, [sp, #556] +; NONEON-NOSVE-NEXT: strh w8, [sp, #174] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #110] +; NONEON-NOSVE-NEXT: str wzr, [sp, #548] +; NONEON-NOSVE-NEXT: strh w8, [sp, #172] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #109] +; NONEON-NOSVE-NEXT: str wzr, [sp, #700] +; NONEON-NOSVE-NEXT: strh w8, [sp, #170] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #108] +; NONEON-NOSVE-NEXT: str wzr, [sp, #692] +; NONEON-NOSVE-NEXT: strh w8, [sp, #168] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #99] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #160] +; NONEON-NOSVE-NEXT: str wzr, [sp, #716] +; NONEON-NOSVE-NEXT: strh w8, [sp, #150] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #98] +; NONEON-NOSVE-NEXT: str wzr, [sp, #708] +; NONEON-NOSVE-NEXT: strh w8, [sp, #148] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #97] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #224] +; NONEON-NOSVE-NEXT: strh w8, [sp, #146] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #96] +; NONEON-NOSVE-NEXT: str wzr, [sp, #732] +; NONEON-NOSVE-NEXT: strh w8, [sp, #144] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #103] +; NONEON-NOSVE-NEXT: str wzr, [sp, #724] +; NONEON-NOSVE-NEXT: strh w8, [sp, #158] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #102] +; NONEON-NOSVE-NEXT: str wzr, [sp, #748] +; NONEON-NOSVE-NEXT: strh w8, [sp, #156] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #101] +; NONEON-NOSVE-NEXT: str wzr, [sp, #740] +; NONEON-NOSVE-NEXT: strh w8, [sp, #154] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #100] +; NONEON-NOSVE-NEXT: str wzr, [sp, #636] +; NONEON-NOSVE-NEXT: strh w8, [sp, #152] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #194] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #144] +; NONEON-NOSVE-NEXT: str wzr, [sp, #628] +; NONEON-NOSVE-NEXT: str w8, [sp, #276] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #192] +; NONEON-NOSVE-NEXT: str wzr, [sp, #652] +; NONEON-NOSVE-NEXT: str w8, [sp, #272] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #198] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #208] +; NONEON-NOSVE-NEXT: str w8, [sp, #284] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #196] +; NONEON-NOSVE-NEXT: str wzr, [sp, #644] +; NONEON-NOSVE-NEXT: str w8, [sp, #280] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #202] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #272] +; NONEON-NOSVE-NEXT: str wzr, [sp, #668] +; NONEON-NOSVE-NEXT: str w8, [sp, #292] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #200] +; NONEON-NOSVE-NEXT: str wzr, [sp, #660] +; NONEON-NOSVE-NEXT: str w8, [sp, #288] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #206] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #400] +; NONEON-NOSVE-NEXT: str w8, [sp, #300] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #204] +; NONEON-NOSVE-NEXT: str wzr, [sp, #684] +; NONEON-NOSVE-NEXT: str w8, [sp, #296] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #176] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #288] +; NONEON-NOSVE-NEXT: str wzr, [sp, #676] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #240] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #182] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #180] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #248] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #186] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #416] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #240] +; NONEON-NOSVE-NEXT: str w8, [sp, #260] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #184] +; NONEON-NOSVE-NEXT: str w8, [sp, #256] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #190] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #368] +; NONEON-NOSVE-NEXT: str w8, [sp, #268] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #188] +; NONEON-NOSVE-NEXT: str w8, [sp, #264] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #226] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #256] +; NONEON-NOSVE-NEXT: str w8, [sp, #340] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #224] +; NONEON-NOSVE-NEXT: str w8, [sp, #336] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #230] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #384] +; NONEON-NOSVE-NEXT: str w8, [sp, #348] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #228] +; NONEON-NOSVE-NEXT: str w8, [sp, #344] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #234] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #336] +; NONEON-NOSVE-NEXT: str w8, [sp, #356] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #232] +; NONEON-NOSVE-NEXT: str w8, [sp, #352] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #238] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #464] +; NONEON-NOSVE-NEXT: str w8, [sp, #364] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #236] +; NONEON-NOSVE-NEXT: str w8, [sp, #360] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #210] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #352] +; NONEON-NOSVE-NEXT: str w8, [sp, #308] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #208] +; NONEON-NOSVE-NEXT: str w8, [sp, #304] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #214] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #480] +; NONEON-NOSVE-NEXT: str w8, [sp, #316] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #212] +; NONEON-NOSVE-NEXT: str w8, [sp, #312] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #218] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #304] +; NONEON-NOSVE-NEXT: str w8, [sp, #324] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #216] +; NONEON-NOSVE-NEXT: str w8, [sp, #320] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #222] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #432] +; NONEON-NOSVE-NEXT: str w8, [sp, #332] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #220] +; NONEON-NOSVE-NEXT: str w8, [sp, #328] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #404] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #320] +; NONEON-NOSVE-NEXT: str w8, [sp, #568] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #400] +; NONEON-NOSVE-NEXT: str w8, [sp, #560] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #412] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #448] +; NONEON-NOSVE-NEXT: str w8, [sp, #584] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #408] +; NONEON-NOSVE-NEXT: str w8, [sp, #576] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #420] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #560] +; NONEON-NOSVE-NEXT: str w8, [sp, #600] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #416] +; NONEON-NOSVE-NEXT: str w8, [sp, #592] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #428] +; NONEON-NOSVE-NEXT: str w8, [sp, #616] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #424] +; NONEON-NOSVE-NEXT: str w8, [sp, #608] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #372] +; NONEON-NOSVE-NEXT: ldp q2, q3, [sp, #592] +; NONEON-NOSVE-NEXT: str w8, [sp, #504] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #368] +; NONEON-NOSVE-NEXT: str w8, [sp, #496] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #380] +; NONEON-NOSVE-NEXT: str w8, [sp, #520] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #376] +; NONEON-NOSVE-NEXT: str w8, [sp, #512] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #388] +; NONEON-NOSVE-NEXT: ldp q4, q5, [sp, #496] +; NONEON-NOSVE-NEXT: str w8, [sp, #536] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #384] +; NONEON-NOSVE-NEXT: str w8, [sp, #528] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #396] +; NONEON-NOSVE-NEXT: str w8, [sp, #552] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #392] +; NONEON-NOSVE-NEXT: str w8, [sp, #544] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #468] +; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #528] +; NONEON-NOSVE-NEXT: str w8, [sp, #696] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #464] +; NONEON-NOSVE-NEXT: str w8, [sp, #688] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #476] +; NONEON-NOSVE-NEXT: str w8, [sp, #712] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #472] +; NONEON-NOSVE-NEXT: str w8, [sp, #704] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #484] +; NONEON-NOSVE-NEXT: ldp q16, q17, [sp, #688] +; NONEON-NOSVE-NEXT: str w8, [sp, #728] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #480] +; NONEON-NOSVE-NEXT: str w8, [sp, #720] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #492] +; NONEON-NOSVE-NEXT: str w8, [sp, #744] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #488] +; NONEON-NOSVE-NEXT: str w8, [sp, #736] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #436] +; NONEON-NOSVE-NEXT: ldp q19, q20, [sp, #720] +; NONEON-NOSVE-NEXT: str w8, [sp, #632] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #432] +; NONEON-NOSVE-NEXT: str w8, [sp, #624] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #444] +; NONEON-NOSVE-NEXT: str w8, [sp, #648] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #440] +; NONEON-NOSVE-NEXT: str w8, [sp, #640] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #452] +; NONEON-NOSVE-NEXT: ldp q22, q23, [sp, #624] +; NONEON-NOSVE-NEXT: str w8, [sp, #664] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #448] +; NONEON-NOSVE-NEXT: str w8, [sp, #656] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #460] +; NONEON-NOSVE-NEXT: str w8, [sp, #680] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #456] +; NONEON-NOSVE-NEXT: str w8, [sp, #672] +; NONEON-NOSVE-NEXT: ldp q21, q18, [sp, #656] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: stp q2, q3, [x1, #32] +; NONEON-NOSVE-NEXT: stp q4, q5, [x1, #64] +; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #96] +; NONEON-NOSVE-NEXT: stp q16, q17, [x1, #128] +; NONEON-NOSVE-NEXT: stp q19, q20, [x1, #160] +; NONEON-NOSVE-NEXT: stp q22, q23, [x1, #192] +; NONEON-NOSVE-NEXT: stp q21, q18, [x1, #224] +; NONEON-NOSVE-NEXT: add sp, sp, #752 +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ret %a = load <32 x i8>, ptr %in %b = add <32 x i8> %a, %a %c = zext <32 x i8> %b to <32 x i64> @@ -766,6 +3613,29 @@ define void @zext_v8i16_v8i32(<8 x i16> %a, ptr %out) { ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v8i16_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #18] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %b = zext <8 x i16> %a to <8 x i32> store <8 x i32>%b, ptr %out ret void @@ -786,6 +3656,95 @@ define void @zext_v16i16_v16i32(ptr %in, ptr %out) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v16i16_v16i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #160 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #4] +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #6] +; NONEON-NOSVE-NEXT: ldrh w3, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w5, [sp] +; NONEON-NOSVE-NEXT: ldrh w2, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w4, [sp, #14] +; NONEON-NOSVE-NEXT: add w13, w13, w13 +; NONEON-NOSVE-NEXT: add w14, w14, w14 +; NONEON-NOSVE-NEXT: ldrh w18, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w0, [sp, #10] +; NONEON-NOSVE-NEXT: strh w14, [sp, #46] +; NONEON-NOSVE-NEXT: add w14, w3, w3 +; NONEON-NOSVE-NEXT: strh w13, [sp, #44] +; NONEON-NOSVE-NEXT: add w13, w5, w5 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: strh w14, [sp, #42] +; NONEON-NOSVE-NEXT: add w14, w4, w4 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26] +; NONEON-NOSVE-NEXT: strh w13, [sp, #40] +; NONEON-NOSVE-NEXT: add w13, w2, w2 +; NONEON-NOSVE-NEXT: ldrh w17, [sp, #22] +; NONEON-NOSVE-NEXT: strh w14, [sp, #38] +; NONEON-NOSVE-NEXT: add w14, w0, w0 +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: strh w13, [sp, #36] +; NONEON-NOSVE-NEXT: add w13, w18, w18 +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w14, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #30] +; NONEON-NOSVE-NEXT: strh w13, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #18] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w16, [sp, #20] +; NONEON-NOSVE-NEXT: strh w9, [sp, #50] +; NONEON-NOSVE-NEXT: add w14, w17, w17 +; NONEON-NOSVE-NEXT: add w12, w12, w12 +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: add w13, w16, w16 +; NONEON-NOSVE-NEXT: add w11, w11, w11 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: add w10, w10, w10 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #78] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #76] +; NONEON-NOSVE-NEXT: strh w14, [sp, #62] +; NONEON-NOSVE-NEXT: add w14, w15, w15 +; NONEON-NOSVE-NEXT: strh w13, [sp, #60] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #74] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #72] +; NONEON-NOSVE-NEXT: strh w14, [sp, #58] +; NONEON-NOSVE-NEXT: strh w12, [sp, #56] +; NONEON-NOSVE-NEXT: strh w11, [sp, #54] +; NONEON-NOSVE-NEXT: strh w10, [sp, #52] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #70] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #68] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #66] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #64] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #94] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #96] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #90] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #88] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #86] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #84] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #82] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #80] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #128] +; NONEON-NOSVE-NEXT: stp q2, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: ret %a = load <16 x i16>, ptr %in %b = add <16 x i16> %a, %a %c = zext <16 x i16> %b to <16 x i32> @@ -807,6 +3766,30 @@ define void @zext_v4i16_v4i64(<4 x i16> %a, ptr %out) { ; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v4i16_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #80 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #72] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #64] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #56] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #48] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %b = zext <4 x i16> %a to <4 x i64> store <4 x i64>%b, ptr %out ret void @@ -828,6 +3811,47 @@ define void @zext_v8i16_v8i64(<8 x i16> %a, ptr %out) { ; CHECK-NEXT: stp q2, q1, [x0] ; CHECK-NEXT: stp q3, q0, [x0, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v8i16_v8i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-160]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #18] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #88] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #152] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #144] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #80] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #136] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #128] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #72] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #128] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #120] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #112] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #64] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #104] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q2, q3, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: ret %b = zext <8 x i16> %a to <8 x i64> store <8 x i64>%b, ptr %out ret void @@ -862,6 +3886,148 @@ define void @zext_v16i16_v16i64(ptr %in, ptr %out) { ; CHECK-NEXT: stp q6, q0, [x1, #96] ; CHECK-NEXT: stp q7, q1, [x1, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v16i16_v16i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #368 +; NONEON-NOSVE-NEXT: str x29, [sp, #352] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 368 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: str wzr, [sp, #268] +; NONEON-NOSVE-NEXT: str wzr, [sp, #260] +; NONEON-NOSVE-NEXT: ldr x29, [sp, #352] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: str wzr, [sp, #284] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #4] +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #6] +; NONEON-NOSVE-NEXT: ldrh w3, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w5, [sp] +; NONEON-NOSVE-NEXT: ldrh w2, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w4, [sp, #14] +; NONEON-NOSVE-NEXT: add w13, w13, w13 +; NONEON-NOSVE-NEXT: add w14, w14, w14 +; NONEON-NOSVE-NEXT: ldrh w18, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w0, [sp, #10] +; NONEON-NOSVE-NEXT: strh w14, [sp, #54] +; NONEON-NOSVE-NEXT: add w14, w3, w3 +; NONEON-NOSVE-NEXT: strh w13, [sp, #52] +; NONEON-NOSVE-NEXT: add w13, w5, w5 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: strh w14, [sp, #50] +; NONEON-NOSVE-NEXT: add w14, w4, w4 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26] +; NONEON-NOSVE-NEXT: strh w13, [sp, #48] +; NONEON-NOSVE-NEXT: add w13, w2, w2 +; NONEON-NOSVE-NEXT: ldrh w17, [sp, #22] +; NONEON-NOSVE-NEXT: strh w14, [sp, #46] +; NONEON-NOSVE-NEXT: add w14, w0, w0 +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: strh w13, [sp, #44] +; NONEON-NOSVE-NEXT: add w13, w18, w18 +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w14, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #30] +; NONEON-NOSVE-NEXT: strh w13, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #18] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #40] +; NONEON-NOSVE-NEXT: add w14, w17, w17 +; NONEON-NOSVE-NEXT: ldrh w16, [sp, #20] +; NONEON-NOSVE-NEXT: strh w9, [sp, #58] +; NONEON-NOSVE-NEXT: add w12, w12, w12 +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: add w11, w11, w11 +; NONEON-NOSVE-NEXT: add w10, w10, w10 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #72] +; NONEON-NOSVE-NEXT: add w13, w16, w16 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #82] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #80] +; NONEON-NOSVE-NEXT: strh w14, [sp, #70] +; NONEON-NOSVE-NEXT: add w14, w15, w15 +; NONEON-NOSVE-NEXT: strh w13, [sp, #68] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #86] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #84] +; NONEON-NOSVE-NEXT: strh w14, [sp, #66] +; NONEON-NOSVE-NEXT: strh w12, [sp, #64] +; NONEON-NOSVE-NEXT: strh w11, [sp, #62] +; NONEON-NOSVE-NEXT: strh w10, [sp, #60] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #56] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #74] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #72] +; NONEON-NOSVE-NEXT: str wzr, [sp, #276] +; NONEON-NOSVE-NEXT: str wzr, [sp, #332] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #78] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #76] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #88] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #120] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #98] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #96] +; NONEON-NOSVE-NEXT: str wzr, [sp, #324] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #102] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #100] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #184] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #104] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #160] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #90] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #88] +; NONEON-NOSVE-NEXT: str wzr, [sp, #348] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #94] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #92] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #168] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #152] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #184] +; NONEON-NOSVE-NEXT: str d0, [sp, #360] +; NONEON-NOSVE-NEXT: ldp d2, d0, [sp, #136] +; NONEON-NOSVE-NEXT: str wzr, [sp, #340] +; NONEON-NOSVE-NEXT: str w9, [sp, #264] +; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #252] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #192] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208] +; NONEON-NOSVE-NEXT: str d2, [sp, #200] +; NONEON-NOSVE-NEXT: str w9, [sp, #280] +; NONEON-NOSVE-NEXT: str w8, [sp, #272] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #168] +; NONEON-NOSVE-NEXT: str wzr, [sp, #300] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #256] +; NONEON-NOSVE-NEXT: str wzr, [sp, #292] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #232] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #224] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #176] +; NONEON-NOSVE-NEXT: str wzr, [sp, #316] +; NONEON-NOSVE-NEXT: str wzr, [sp, #308] +; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #244] +; NONEON-NOSVE-NEXT: str w8, [sp, #240] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #216] +; NONEON-NOSVE-NEXT: ldp q3, q4, [sp, #224] +; NONEON-NOSVE-NEXT: str w8, [sp, #320] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #364] +; NONEON-NOSVE-NEXT: str w9, [sp, #328] +; NONEON-NOSVE-NEXT: str w8, [sp, #344] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #360] +; NONEON-NOSVE-NEXT: str w8, [sp, #336] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #200] +; NONEON-NOSVE-NEXT: ldp q6, q7, [sp, #320] +; NONEON-NOSVE-NEXT: str w9, [sp, #296] +; NONEON-NOSVE-NEXT: str w8, [sp, #288] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #208] +; NONEON-NOSVE-NEXT: str w9, [sp, #312] +; NONEON-NOSVE-NEXT: str w8, [sp, #304] +; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #288] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32] +; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64] +; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96] +; NONEON-NOSVE-NEXT: add sp, sp, #368 +; NONEON-NOSVE-NEXT: ret %a = load <16 x i16>, ptr %in %b = add <16 x i16> %a, %a %c = zext <16 x i16> %b to <16 x i64> @@ -882,6 +4048,23 @@ define void @zext_v4i32_v4i64(<4 x i32> %a, ptr %out) { ; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v4i32_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #56] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #48] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #40] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %b = zext <4 x i32> %a to <4 x i64> store <4 x i64>%b, ptr %out ret void @@ -902,6 +4085,51 @@ define void @zext_v8i32_v8i64(ptr %in, ptr %out) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v8i32_v8i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #160 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp] +; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #16] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: add w9, w13, w13 +; NONEON-NOSVE-NEXT: add w8, w12, w12 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: add w9, w15, w15 +; NONEON-NOSVE-NEXT: add w8, w14, w14 +; NONEON-NOSVE-NEXT: ldp w10, w11, [sp, #24] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: add w9, w11, w11 +; NONEON-NOSVE-NEXT: add w8, w10, w10 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #72] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #120] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #112] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #64] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #104] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #96] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #88] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #96] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #152] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #144] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #80] +; NONEON-NOSVE-NEXT: stp w9, wzr, [sp, #136] +; NONEON-NOSVE-NEXT: stp w8, wzr, [sp, #128] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #128] +; NONEON-NOSVE-NEXT: stp q2, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: ret %a = load <8 x i32>, ptr %in %b = add <8 x i32> %a, %a %c = zext <8 x i32> %b to <8 x i64> @@ -928,6 +4156,21 @@ define void @extend_and_mul(i32 %0, <2 x i64> %1, ptr %2) { ; SVE2-NEXT: mul z0.d, z1.d, z0.d ; SVE2-NEXT: str q0, [x1] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: extend_and_mul: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: mov w9, w0 +; NONEON-NOSVE-NEXT: mul x10, x9, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: mul x8, x9, x8 +; NONEON-NOSVE-NEXT: stp x8, x10, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %broadcast.splatinsert2 = insertelement <2 x i32> poison, i32 %0, i64 0 %broadcast.splat3 = shufflevector <2 x i32> %broadcast.splatinsert2, <2 x i32> poison, <2 x i32> zeroinitializer %4 = zext <2 x i32> %broadcast.splat3 to <2 x i64> @@ -943,6 +4186,16 @@ define void @extend_no_mul(i32 %0, <2 x i64> %1, ptr %2) { ; CHECK-NEXT: mov z0.d, x8 ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extend_no_mul: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: mov w8, w0 +; NONEON-NOSVE-NEXT: stp x8, x8, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr q0, [sp] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret entry: %broadcast.splatinsert2 = insertelement <2 x i32> poison, i32 %0, i64 0 %broadcast.splat3 = shufflevector <2 x i32> %broadcast.splatinsert2, <2 x i32> poison, <2 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll index 54276bb4ba01d..1f5bb5f5486af 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -22,6 +23,112 @@ define void @add_v32i8(ptr %a) { ; CHECK-NEXT: add z1.b, z1.b, #7 // =0x7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #59] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #57] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #55] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #53] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #51] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #49] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: add w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i32 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -38,6 +145,64 @@ define void @add_v16i16(ptr %a) { ; CHECK-NEXT: add z1.h, z1.h, #15 // =0xf ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: add w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: add w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: add w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: add w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: add w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: add w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: add w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: add w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: add w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: add w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: add w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: add w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: add w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: add w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp] +; NONEON-NOSVE-NEXT: add w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -54,6 +219,36 @@ define void @add_v8i32(ptr %a) { ; CHECK-NEXT: add z1.s, z1.s, #31 // =0x1f ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: add w9, w8, #31 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: add w8, w8, #31 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: add w9, w8, #31 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w8, #31 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: add w9, w8, #31 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: add w8, w8, #31 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: add w9, w8, #31 +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: add w8, w8, #31 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -70,6 +265,26 @@ define void @add_v4i64(ptr %a) { ; CHECK-NEXT: add z1.d, z1.d, #63 // =0x3f ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: add x9, x8, #63 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: add x8, x8, #63 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: add x9, x8, #63 +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: add x8, x8, #63 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -90,6 +305,112 @@ define void @and_v32i8(ptr %a) { ; CHECK-NEXT: and z1.b, z1.b, #0x7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #59] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #57] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #55] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #53] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #51] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #49] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: and w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i32 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -106,6 +427,64 @@ define void @and_v16i16(ptr %a) { ; CHECK-NEXT: and z1.h, z1.h, #0xf ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: and w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: and w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: and w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: and w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: and w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: and w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: and w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: and w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: and w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: and w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: and w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: and w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: and w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: and w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: and w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp] +; NONEON-NOSVE-NEXT: and w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -122,6 +501,36 @@ define void @and_v8i32(ptr %a) { ; CHECK-NEXT: and z1.s, z1.s, #0x1f ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: and w9, w8, #0x1f +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: and w8, w8, #0x1f +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: and w9, w8, #0x1f +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: and w8, w8, #0x1f +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: and w9, w8, #0x1f +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: and w8, w8, #0x1f +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: and w9, w8, #0x1f +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: and w8, w8, #0x1f +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -138,6 +547,26 @@ define void @and_v4i64(ptr %a) { ; CHECK-NEXT: and z1.d, z1.d, #0x3f ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: and x9, x8, #0x3f +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: and x8, x8, #0x3f +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: and x9, x8, #0x3f +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: and x8, x8, #0x3f +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -158,6 +587,112 @@ define void @ashr_v32i8(ptr %a) { ; CHECK-NEXT: asr z1.b, z1.b, #7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #31] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #29] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #61] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #27] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #59] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #25] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #57] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #55] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #53] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #51] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #49] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #15] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #14] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #13] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #12] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #11] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #10] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #9] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #8] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #7] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #6] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #5] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #4] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #3] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #2] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #1] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp] +; NONEON-NOSVE-NEXT: lsr w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i32 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -174,6 +709,64 @@ define void @ashr_v16i16(ptr %a) { ; CHECK-NEXT: asr z1.h, z1.h, #15 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #30] +; NONEON-NOSVE-NEXT: lsr w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28] +; NONEON-NOSVE-NEXT: lsr w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #26] +; NONEON-NOSVE-NEXT: lsr w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24] +; NONEON-NOSVE-NEXT: lsr w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #22] +; NONEON-NOSVE-NEXT: lsr w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsr w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18] +; NONEON-NOSVE-NEXT: lsr w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsr w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #14] +; NONEON-NOSVE-NEXT: lsr w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #12] +; NONEON-NOSVE-NEXT: lsr w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #10] +; NONEON-NOSVE-NEXT: lsr w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #8] +; NONEON-NOSVE-NEXT: lsr w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #6] +; NONEON-NOSVE-NEXT: lsr w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #4] +; NONEON-NOSVE-NEXT: lsr w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #2] +; NONEON-NOSVE-NEXT: lsr w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp] +; NONEON-NOSVE-NEXT: lsr w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -190,6 +783,36 @@ define void @ashr_v8i32(ptr %a) { ; CHECK-NEXT: asr z1.s, z1.s, #31 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: asr w9, w8, #31 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: asr w8, w8, #31 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: asr w9, w8, #31 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: asr w8, w8, #31 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: asr w9, w8, #31 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: asr w8, w8, #31 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: asr w9, w8, #31 +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: asr w8, w8, #31 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -206,6 +829,26 @@ define void @ashr_v4i64(ptr %a) { ; CHECK-NEXT: asr z1.d, z1.d, #63 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: asr x9, x8, #63 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: asr x8, x8, #63 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: asr x9, x8, #63 +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: asr x8, x8, #63 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -229,6 +872,144 @@ define void @icmp_eq_v32i8(ptr %a) { ; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #59] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #57] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #55] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #53] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #51] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #49] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: cmp w8, #7 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -249,6 +1030,80 @@ define void @icmp_sge_v16i16(ptr %a) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_sge_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w8, #14 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w8, #14 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w8, #14 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w8, #14 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w8, #14 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w8, #14 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w8, #14 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w8, #14 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, #14 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, #14 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w8, #14 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, #14 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w8, #14 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w8, #14 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #2] +; NONEON-NOSVE-NEXT: cmp w8, #14 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp] +; NONEON-NOSVE-NEXT: cmp w8, #14 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -269,6 +1124,44 @@ define void @icmp_sgt_v8i32(ptr %a) { ; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_sgt_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmn w8, #8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: csetm w9, gt +; NONEON-NOSVE-NEXT: cmn w8, #8 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmn w8, #8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: csetm w9, gt +; NONEON-NOSVE-NEXT: cmn w8, #8 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: cmn w8, #8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: csetm w9, gt +; NONEON-NOSVE-NEXT: cmn w8, #8 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: cmn w8, #8 +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: csetm w9, gt +; NONEON-NOSVE-NEXT: cmn w8, #8 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 -8, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -289,6 +1182,30 @@ define void @icmp_ult_v4i64(ptr %a) { ; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_ult_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp x8, #63 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: csetm x9, lo +; NONEON-NOSVE-NEXT: cmp x8, #63 +; NONEON-NOSVE-NEXT: csetm x8, lo +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: cmp x8, #63 +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: csetm x9, lo +; NONEON-NOSVE-NEXT: cmp x8, #63 +; NONEON-NOSVE-NEXT: csetm x8, lo +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -310,6 +1227,112 @@ define void @lshr_v32i8(ptr %a) { ; CHECK-NEXT: lsr z1.b, z1.b, #7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #59] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #57] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #55] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #53] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #51] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #49] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #7, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -326,6 +1349,64 @@ define void @lshr_v16i16(ptr %a) { ; CHECK-NEXT: lsr z1.h, z1.h, #15 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #15, #1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #15, #1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #15, #1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #15, #1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #15, #1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #15, #1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #15, #1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #15, #1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #15, #1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #15, #1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #15, #1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #15, #1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #15, #1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #15, #1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #15, #1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp] +; NONEON-NOSVE-NEXT: ubfx w8, w8, #15, #1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -342,6 +1423,36 @@ define void @lshr_v8i32(ptr %a) { ; CHECK-NEXT: lsr z1.s, z1.s, #31 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: lsr w9, w8, #31 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: lsr w8, w8, #31 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsr w9, w8, #31 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsr w8, w8, #31 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: lsr w9, w8, #31 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: lsr w8, w8, #31 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: lsr w9, w8, #31 +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: lsr w8, w8, #31 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -358,6 +1469,26 @@ define void @lshr_v4i64(ptr %a) { ; CHECK-NEXT: lsr z1.d, z1.d, #63 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: lsr x9, x8, #63 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: lsr x8, x8, #63 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: lsr x9, x8, #63 +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: lsr x8, x8, #63 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -378,6 +1509,144 @@ define void @mul_v32i8(ptr %a) { ; CHECK-NEXT: mul z1.b, z1.b, #7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #59] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #57] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #55] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #53] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #51] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #49] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: lsl w9, w8, #3 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -394,6 +1663,80 @@ define void @mul_v16i16(ptr %a) { ; CHECK-NEXT: mul z1.h, z1.h, #15 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: lsl w9, w8, #4 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: lsl w9, w8, #4 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: lsl w9, w8, #4 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: lsl w9, w8, #4 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: lsl w9, w8, #4 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsl w9, w8, #4 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: lsl w9, w8, #4 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsl w9, w8, #4 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: lsl w9, w8, #4 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: lsl w9, w8, #4 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: lsl w9, w8, #4 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: lsl w9, w8, #4 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: lsl w9, w8, #4 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: lsl w9, w8, #4 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: lsl w9, w8, #4 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp] +; NONEON-NOSVE-NEXT: lsl w9, w8, #4 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -410,6 +1753,48 @@ define void @mul_v8i32(ptr %a) { ; CHECK-NEXT: mul z1.s, z1.s, #31 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: lsl w9, w8, #5 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: lsl w9, w8, #5 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsl w9, w8, #5 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsl w9, w8, #5 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: lsl w9, w8, #5 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: lsl w9, w8, #5 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: lsl w9, w8, #5 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: lsl w9, w8, #5 +; NONEON-NOSVE-NEXT: sub w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -426,6 +1811,32 @@ define void @mul_v4i64(ptr %a) { ; CHECK-NEXT: mul z1.d, z1.d, #63 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: lsl x9, x8, #6 +; NONEON-NOSVE-NEXT: sub x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #56] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: lsl x9, x8, #6 +; NONEON-NOSVE-NEXT: sub x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #48] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: lsl x9, x8, #6 +; NONEON-NOSVE-NEXT: sub x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: lsl x9, x8, #6 +; NONEON-NOSVE-NEXT: sub x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -446,6 +1857,112 @@ define void @or_v32i8(ptr %a) { ; CHECK-NEXT: orr z1.b, z1.b, #0x7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #59] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #57] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #55] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #53] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #51] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #49] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -462,6 +1979,64 @@ define void @or_v16i16(ptr %a) { ; CHECK-NEXT: orr z1.h, z1.h, #0xf ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: orr w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: orr w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: orr w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: orr w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: orr w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: orr w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: orr w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: orr w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: orr w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: orr w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: orr w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: orr w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: orr w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: orr w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: orr w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp] +; NONEON-NOSVE-NEXT: orr w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -478,6 +2053,36 @@ define void @or_v8i32(ptr %a) { ; CHECK-NEXT: orr z1.s, z1.s, #0x1f ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: orr w9, w8, #0x1f +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x1f +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: orr w9, w8, #0x1f +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x1f +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: orr w9, w8, #0x1f +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x1f +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: orr w9, w8, #0x1f +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: orr w8, w8, #0x1f +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -494,6 +2099,26 @@ define void @or_v4i64(ptr %a) { ; CHECK-NEXT: orr z1.d, z1.d, #0x3f ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: orr x9, x8, #0x3f +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: orr x8, x8, #0x3f +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: orr x9, x8, #0x3f +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: orr x8, x8, #0x3f +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -514,6 +2139,112 @@ define void @shl_v32i8(ptr %a) { ; CHECK-NEXT: lsl z1.b, z1.b, #7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #59] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #57] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #55] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #53] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #51] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #49] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: lsl w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -530,6 +2261,64 @@ define void @shl_v16i16(ptr %a) { ; CHECK-NEXT: lsl z1.h, z1.h, #15 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: lsl w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: lsl w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: lsl w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: lsl w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: lsl w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsl w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: lsl w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsl w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: lsl w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: lsl w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: lsl w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: lsl w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: lsl w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: lsl w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: lsl w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp] +; NONEON-NOSVE-NEXT: lsl w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -546,6 +2335,36 @@ define void @shl_v8i32(ptr %a) { ; CHECK-NEXT: lsl z1.s, z1.s, #31 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: lsl w9, w8, #31 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: lsl w8, w8, #31 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsl w9, w8, #31 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsl w8, w8, #31 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: lsl w9, w8, #31 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: lsl w8, w8, #31 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: lsl w9, w8, #31 +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: lsl w8, w8, #31 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -562,6 +2381,26 @@ define void @shl_v4i64(ptr %a) { ; CHECK-NEXT: lsl z1.d, z1.d, #63 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: lsl x9, x8, #63 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: lsl x8, x8, #63 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: lsl x9, x8, #63 +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: lsl x8, x8, #63 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -582,6 +2421,145 @@ define void @smax_v32i8(ptr %a) { ; CHECK-NEXT: smax z1.b, z1.b, #7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: mov w8, #7 // =0x7 +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #31] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #63] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #29] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #61] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #60] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #27] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #59] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #58] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #25] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #57] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #23] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #55] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #54] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #21] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #53] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #52] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #19] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #51] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #50] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #17] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #49] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #47] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #45] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #43] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #41] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #7] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #39] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #5] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #37] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #36] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #3] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #35] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #2] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #1] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w9, [sp, #33] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -598,6 +2576,81 @@ define void @smax_v16i16(ptr %a) { ; CHECK-NEXT: smax z1.h, z1.h, #15 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strh w9, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strh w9, [sp, #60] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strh w9, [sp, #58] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strh w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strh w9, [sp, #54] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strh w9, [sp, #52] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strh w9, [sp, #50] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strh w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strh w9, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strh w9, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strh w9, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strh w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strh w9, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strh w9, [sp, #36] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #2] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: strh w9, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -614,6 +2667,45 @@ define void @smax_v8i32(ptr %a) { ; CHECK-NEXT: smax z1.s, z1.s, #31 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w10, w9, w8, gt +; NONEON-NOSVE-NEXT: ldr w9, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w10, w9, w8, gt +; NONEON-NOSVE-NEXT: ldr w9, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w10, w9, w8, gt +; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, gt +; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w10, w9, w8, gt +; NONEON-NOSVE-NEXT: ldr w9, [sp] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -630,6 +2722,31 @@ define void @smax_v4i64(ptr %a) { ; CHECK-NEXT: smax z1.d, z1.d, #63 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: mov w8, #63 // =0x3f +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr x9, [sp, #24] +; NONEON-NOSVE-NEXT: cmp x9, #63 +; NONEON-NOSVE-NEXT: csel x10, x9, x8, gt +; NONEON-NOSVE-NEXT: ldr x9, [sp, #16] +; NONEON-NOSVE-NEXT: cmp x9, #63 +; NONEON-NOSVE-NEXT: csel x9, x9, x8, gt +; NONEON-NOSVE-NEXT: stp x9, x10, [sp, #48] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #8] +; NONEON-NOSVE-NEXT: cmp x9, #63 +; NONEON-NOSVE-NEXT: csel x10, x9, x8, gt +; NONEON-NOSVE-NEXT: ldr x9, [sp] +; NONEON-NOSVE-NEXT: cmp x9, #63 +; NONEON-NOSVE-NEXT: csel x8, x9, x8, gt +; NONEON-NOSVE-NEXT: stp x8, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -650,6 +2767,145 @@ define void @smin_v32i8(ptr %a) { ; CHECK-NEXT: smin z1.b, z1.b, #7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: mov w8, #7 // =0x7 +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #31] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #63] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #29] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #61] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #60] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #27] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #59] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #58] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #25] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #57] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #23] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #55] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #54] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #21] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #53] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #52] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #19] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #51] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #50] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #17] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #49] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #47] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #45] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #43] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #41] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #7] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #39] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #5] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #37] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #36] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #3] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #35] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #2] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #1] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w9, [sp, #33] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -666,6 +2922,81 @@ define void @smin_v16i16(ptr %a) { ; CHECK-NEXT: smin z1.h, z1.h, #15 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strh w9, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strh w9, [sp, #60] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strh w9, [sp, #58] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strh w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strh w9, [sp, #54] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strh w9, [sp, #52] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strh w9, [sp, #50] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strh w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strh w9, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strh w9, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strh w9, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strh w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strh w9, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strh w9, [sp, #36] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #2] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: strh w9, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -682,6 +3013,45 @@ define void @smin_v8i32(ptr %a) { ; CHECK-NEXT: smin z1.s, z1.s, #31 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w10, w9, w8, lt +; NONEON-NOSVE-NEXT: ldr w9, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w10, w9, w8, lt +; NONEON-NOSVE-NEXT: ldr w9, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w10, w9, w8, lt +; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lt +; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w10, w9, w8, lt +; NONEON-NOSVE-NEXT: ldr w9, [sp] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -698,6 +3068,31 @@ define void @smin_v4i64(ptr %a) { ; CHECK-NEXT: smin z1.d, z1.d, #63 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: mov w8, #63 // =0x3f +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr x9, [sp, #24] +; NONEON-NOSVE-NEXT: cmp x9, #63 +; NONEON-NOSVE-NEXT: csel x10, x9, x8, lt +; NONEON-NOSVE-NEXT: ldr x9, [sp, #16] +; NONEON-NOSVE-NEXT: cmp x9, #63 +; NONEON-NOSVE-NEXT: csel x9, x9, x8, lt +; NONEON-NOSVE-NEXT: stp x9, x10, [sp, #48] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #8] +; NONEON-NOSVE-NEXT: cmp x9, #63 +; NONEON-NOSVE-NEXT: csel x10, x9, x8, lt +; NONEON-NOSVE-NEXT: ldr x9, [sp] +; NONEON-NOSVE-NEXT: cmp x9, #63 +; NONEON-NOSVE-NEXT: csel x8, x9, x8, lt +; NONEON-NOSVE-NEXT: stp x8, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -718,6 +3113,112 @@ define void @sub_v32i8(ptr %a) { ; CHECK-NEXT: sub z1.b, z1.b, #7 // =0x7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #59] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #57] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #55] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #53] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #51] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #49] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: sub w8, w8, #7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -734,6 +3235,64 @@ define void @sub_v16i16(ptr %a) { ; CHECK-NEXT: sub z1.h, z1.h, #15 // =0xf ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: sub w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: sub w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: sub w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: sub w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: sub w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: sub w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: sub w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: sub w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: sub w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: sub w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: sub w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: sub w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: sub w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: sub w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: sub w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp] +; NONEON-NOSVE-NEXT: sub w8, w8, #15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -750,6 +3309,36 @@ define void @sub_v8i32(ptr %a) { ; CHECK-NEXT: sub z1.s, z1.s, #31 // =0x1f ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: sub w9, w8, #31 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: sub w8, w8, #31 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: sub w9, w8, #31 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: sub w8, w8, #31 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: sub w9, w8, #31 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: sub w8, w8, #31 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: sub w9, w8, #31 +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: sub w8, w8, #31 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -766,6 +3355,26 @@ define void @sub_v4i64(ptr %a) { ; CHECK-NEXT: sub z1.d, z1.d, #63 // =0x3f ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: sub x9, x8, #63 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: sub x8, x8, #63 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: sub x9, x8, #63 +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: sub x8, x8, #63 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -786,6 +3395,145 @@ define void @umax_v32i8(ptr %a) { ; CHECK-NEXT: umax z1.b, z1.b, #7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: mov w8, #7 // =0x7 +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #31] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #30] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #29] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #28] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #27] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #59] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #26] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #25] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #57] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #24] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #23] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #55] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #22] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #21] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #53] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #20] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #51] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #18] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #17] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #49] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #16] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w9, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: tst w9, #0xf8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -802,6 +3550,81 @@ define void @umax_v16i16(ptr %a) { ; CHECK-NEXT: umax z1.h, z1.h, #15 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #30] +; NONEON-NOSVE-NEXT: tst w9, #0xfff0 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strh w9, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #28] +; NONEON-NOSVE-NEXT: tst w9, #0xfff0 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strh w9, [sp, #60] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26] +; NONEON-NOSVE-NEXT: tst w9, #0xfff0 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strh w9, [sp, #58] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #24] +; NONEON-NOSVE-NEXT: tst w9, #0xfff0 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strh w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #22] +; NONEON-NOSVE-NEXT: tst w9, #0xfff0 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strh w9, [sp, #54] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #20] +; NONEON-NOSVE-NEXT: tst w9, #0xfff0 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strh w9, [sp, #52] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #18] +; NONEON-NOSVE-NEXT: tst w9, #0xfff0 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strh w9, [sp, #50] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #16] +; NONEON-NOSVE-NEXT: tst w9, #0xfff0 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strh w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: tst w9, #0xfff0 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strh w9, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: tst w9, #0xfff0 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strh w9, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: tst w9, #0xfff0 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strh w9, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: tst w9, #0xfff0 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strh w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: tst w9, #0xfff0 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strh w9, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: tst w9, #0xfff0 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strh w9, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: tst w9, #0xfff0 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, ne +; NONEON-NOSVE-NEXT: strh w9, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: tst w9, #0xfff0 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -818,6 +3641,45 @@ define void @umax_v8i32(ptr %a) { ; CHECK-NEXT: umax z1.s, z1.s, #31 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w10, w9, w8, hi +; NONEON-NOSVE-NEXT: ldr w9, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, hi +; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w10, w9, w8, hi +; NONEON-NOSVE-NEXT: ldr w9, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, hi +; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w10, w9, w8, hi +; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, hi +; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w10, w9, w8, hi +; NONEON-NOSVE-NEXT: ldr w9, [sp] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -834,6 +3696,31 @@ define void @umax_v4i64(ptr %a) { ; CHECK-NEXT: umax z1.d, z1.d, #63 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: mov w8, #63 // =0x3f +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr x9, [sp, #24] +; NONEON-NOSVE-NEXT: cmp x9, #63 +; NONEON-NOSVE-NEXT: csel x10, x9, x8, hi +; NONEON-NOSVE-NEXT: ldr x9, [sp, #16] +; NONEON-NOSVE-NEXT: cmp x9, #63 +; NONEON-NOSVE-NEXT: csel x9, x9, x8, hi +; NONEON-NOSVE-NEXT: stp x9, x10, [sp, #48] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #8] +; NONEON-NOSVE-NEXT: cmp x9, #63 +; NONEON-NOSVE-NEXT: csel x10, x9, x8, hi +; NONEON-NOSVE-NEXT: ldr x9, [sp] +; NONEON-NOSVE-NEXT: cmp x9, #63 +; NONEON-NOSVE-NEXT: csel x8, x9, x8, hi +; NONEON-NOSVE-NEXT: stp x8, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -854,6 +3741,145 @@ define void @umin_v32i8(ptr %a) { ; CHECK-NEXT: umin z1.b, z1.b, #7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: mov w8, #7 // =0x7 +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #31] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #29] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #27] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #59] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #25] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #57] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #23] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #55] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #21] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #53] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #19] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #51] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #17] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #49] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w9, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: cmp w9, #7 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -870,6 +3896,81 @@ define void @umin_v16i16(ptr %a) { ; CHECK-NEXT: umin z1.h, z1.h, #15 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strh w9, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strh w9, [sp, #60] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strh w9, [sp, #58] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strh w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strh w9, [sp, #54] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strh w9, [sp, #52] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strh w9, [sp, #50] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strh w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strh w9, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strh w9, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strh w9, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strh w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strh w9, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strh w9, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: strh w9, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: cmp w9, #15 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -886,6 +3987,45 @@ define void @umin_v8i32(ptr %a) { ; CHECK-NEXT: umin z1.s, z1.s, #31 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w10, w9, w8, lo +; NONEON-NOSVE-NEXT: ldr w9, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w10, w9, w8, lo +; NONEON-NOSVE-NEXT: ldr w9, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w10, w9, w8, lo +; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w9, w9, w8, lo +; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w10, w9, w8, lo +; NONEON-NOSVE-NEXT: ldr w9, [sp] +; NONEON-NOSVE-NEXT: cmp w9, #31 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -902,6 +4042,31 @@ define void @umin_v4i64(ptr %a) { ; CHECK-NEXT: umin z1.d, z1.d, #63 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: mov w8, #63 // =0x3f +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr x9, [sp, #24] +; NONEON-NOSVE-NEXT: cmp x9, #63 +; NONEON-NOSVE-NEXT: csel x10, x9, x8, lo +; NONEON-NOSVE-NEXT: ldr x9, [sp, #16] +; NONEON-NOSVE-NEXT: cmp x9, #63 +; NONEON-NOSVE-NEXT: csel x9, x9, x8, lo +; NONEON-NOSVE-NEXT: stp x9, x10, [sp, #48] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #8] +; NONEON-NOSVE-NEXT: cmp x9, #63 +; NONEON-NOSVE-NEXT: csel x10, x9, x8, lo +; NONEON-NOSVE-NEXT: ldr x9, [sp] +; NONEON-NOSVE-NEXT: cmp x9, #63 +; NONEON-NOSVE-NEXT: csel x8, x9, x8, lo +; NONEON-NOSVE-NEXT: stp x8, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -922,6 +4087,112 @@ define void @xor_v32i8(ptr %a) { ; CHECK-NEXT: eor z1.b, z1.b, #0x7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #59] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #57] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #55] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #53] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #51] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #49] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -938,6 +4209,64 @@ define void @xor_v16i16(ptr %a) { ; CHECK-NEXT: eor z1.h, z1.h, #0xf ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: eor w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: eor w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: eor w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: eor w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: eor w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: eor w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: eor w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: eor w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: eor w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: eor w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: eor w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: eor w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: eor w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: eor w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: eor w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp] +; NONEON-NOSVE-NEXT: eor w8, w8, #0xf +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -954,6 +4283,36 @@ define void @xor_v8i32(ptr %a) { ; CHECK-NEXT: eor z1.s, z1.s, #0x1f ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: eor w9, w8, #0x1f +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x1f +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: eor w9, w8, #0x1f +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x1f +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: eor w9, w8, #0x1f +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x1f +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: eor w9, w8, #0x1f +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: eor w8, w8, #0x1f +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -970,6 +4329,26 @@ define void @xor_v4i64(ptr %a) { ; CHECK-NEXT: eor z1.d, z1.d, #0x3f ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: eor x9, x8, #0x3f +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: eor x8, x8, #0x3f +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: eor x9, x8, #0x3f +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: eor x8, x8, #0x3f +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll index 40824ba9ae9c5..3137a7bc7ad27 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -16,6 +17,47 @@ define <8 x i8> @and_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = and <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -28,6 +70,78 @@ define <16 x i8> @and_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = and <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -41,6 +155,147 @@ define void @and_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: and z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #47] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #95] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #62] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #61] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #93] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #60] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #59] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #91] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #58] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #57] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #89] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #56] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #55] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #54] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #53] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #85] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #52] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #51] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #50] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #49] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #81] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #48] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #79] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #77] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #75] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #73] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #71] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #69] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #67] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #65] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = and <32 x i8> %op1, %op2 @@ -56,6 +311,31 @@ define <4 x i16> @and_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = and <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -68,6 +348,46 @@ define <8 x i16> @and_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = and <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -81,6 +401,83 @@ define void @and_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: and z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #46] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #44] +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #42] +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #58] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #40] +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #38] +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #54] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #36] +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #34] +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #50] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = and <16 x i16> %op1, %op2 @@ -96,6 +493,22 @@ define <2 x i32> @and_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: and w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = and <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -108,6 +521,28 @@ define <4 x i32> @and_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: and w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: str w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: and w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = and <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -121,6 +556,47 @@ define void @and_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: and z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] +; NONEON-NOSVE-NEXT: and w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #32] +; NONEON-NOSVE-NEXT: str w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] +; NONEON-NOSVE-NEXT: and w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: str w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: and w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: str w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: and w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = and <8 x i32> %op1, %op2 @@ -136,6 +612,18 @@ define <1 x i64> @and_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: and x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = and <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -148,6 +636,21 @@ define <2 x i64> @and_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: and x8, x10, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: and x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = and <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -161,6 +664,33 @@ define void @and_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: and z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #56] +; NONEON-NOSVE-NEXT: and x8, x10, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #88] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #48] +; NONEON-NOSVE-NEXT: and x8, x9, x8 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: str x8, [sp, #80] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: and x8, x10, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #72] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: and x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = and <4 x i64> %op1, %op2 @@ -180,6 +710,47 @@ define <8 x i8> @or_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = or <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -192,6 +763,78 @@ define <16 x i8> @or_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = or <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -205,6 +848,147 @@ define void @or_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: orr z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #47] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #95] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #62] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #61] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #93] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #60] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #59] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #91] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #58] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #57] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #89] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #56] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #55] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #54] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #53] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #85] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #52] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #51] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #50] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #49] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #81] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #48] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #79] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #77] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #75] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #73] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #71] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #69] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #67] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #65] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = or <32 x i8> %op1, %op2 @@ -220,6 +1004,31 @@ define <4 x i16> @or_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = or <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -232,6 +1041,46 @@ define <8 x i16> @or_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = or <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -245,6 +1094,83 @@ define void @or_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: orr z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #46] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #44] +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #42] +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #58] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #40] +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #38] +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #54] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #36] +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #34] +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #50] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = or <16 x i16> %op1, %op2 @@ -260,6 +1186,22 @@ define <2 x i32> @or_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: orr w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = or <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -272,6 +1214,28 @@ define <4 x i32> @or_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: orr w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: str w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: orr w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = or <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -285,6 +1249,47 @@ define void @or_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: orr z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] +; NONEON-NOSVE-NEXT: orr w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #32] +; NONEON-NOSVE-NEXT: str w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] +; NONEON-NOSVE-NEXT: orr w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: str w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: orr w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: str w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: orr w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = or <8 x i32> %op1, %op2 @@ -300,6 +1305,18 @@ define <1 x i64> @or_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: orr x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = or <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -312,6 +1329,21 @@ define <2 x i64> @or_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: orr x8, x10, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: orr x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = or <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -325,6 +1357,33 @@ define void @or_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: orr z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #56] +; NONEON-NOSVE-NEXT: orr x8, x10, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #88] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #48] +; NONEON-NOSVE-NEXT: orr x8, x9, x8 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: str x8, [sp, #80] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: orr x8, x10, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #72] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: orr x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = or <4 x i64> %op1, %op2 @@ -344,6 +1403,47 @@ define <8 x i8> @xor_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = xor <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -356,6 +1456,78 @@ define <16 x i8> @xor_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = xor <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -369,6 +1541,147 @@ define void @xor_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: eor z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #47] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #95] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #62] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #61] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #93] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #60] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #59] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #91] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #58] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #57] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #89] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #56] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #55] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #54] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #53] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #85] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #52] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #51] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #50] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #49] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #81] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #48] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #79] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #77] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #75] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #73] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #71] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #69] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #67] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #65] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = xor <32 x i8> %op1, %op2 @@ -384,6 +1697,31 @@ define <4 x i16> @xor_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = xor <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -396,6 +1734,46 @@ define <8 x i16> @xor_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = xor <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -409,6 +1787,83 @@ define void @xor_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: eor z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #46] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #44] +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #42] +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #58] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #40] +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #38] +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #54] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #36] +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #34] +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #50] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = xor <16 x i16> %op1, %op2 @@ -424,6 +1879,22 @@ define <2 x i32> @xor_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: eor w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = xor <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -436,6 +1907,28 @@ define <4 x i32> @xor_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: eor w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: str w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: eor w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = xor <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -449,6 +1942,47 @@ define void @xor_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: eor z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] +; NONEON-NOSVE-NEXT: eor w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #32] +; NONEON-NOSVE-NEXT: str w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] +; NONEON-NOSVE-NEXT: eor w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: str w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: eor w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: str w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: eor w8, w10, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = xor <8 x i32> %op1, %op2 @@ -464,6 +1998,18 @@ define <1 x i64> @xor_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: eor x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = xor <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -476,6 +2022,21 @@ define <2 x i64> @xor_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: eor x8, x10, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: eor x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = xor <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -489,6 +2050,33 @@ define void @xor_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: eor z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #56] +; NONEON-NOSVE-NEXT: eor x8, x10, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #88] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #48] +; NONEON-NOSVE-NEXT: eor x8, x9, x8 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: str x8, [sp, #80] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: eor x8, x10, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #72] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: eor x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = xor <4 x i64> %op1, %op2 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll index 74ee5482a60c4..4775a965b70d7 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -17,6 +18,55 @@ define <8 x i8> @smax_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <8 x i8> @llvm.smax.v8i8(<8 x i8> %op1, <8 x i8> %op2) ret <8 x i8> %res } @@ -30,6 +80,94 @@ define <16 x i8> @smax_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #29] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #27] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #25] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %op1, <16 x i8> %op2) ret <16 x i8> %res } @@ -45,6 +183,179 @@ define void @smax_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: smax z1.b, p0/m, z1.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #47] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #95] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #62] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #61] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #93] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #60] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #59] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #91] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #58] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #57] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #89] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #56] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #55] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #54] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #53] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #85] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #52] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #51] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #50] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #49] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #81] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #48] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #31] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #79] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #29] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #77] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #27] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #75] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #25] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #73] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #71] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #69] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #67] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #65] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = call <32 x i8> @llvm.smax.v32i8(<32 x i8> %op1, <32 x i8> %op2) @@ -61,6 +372,35 @@ define <4 x i16> @smax_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i16> @llvm.smax.v4i16(<4 x i16> %op1, <4 x i16> %op2) ret <4 x i16> %res } @@ -74,6 +414,54 @@ define <8 x i16> @smax_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %op1, <8 x i16> %op2) ret <8 x i16> %res } @@ -89,6 +477,99 @@ define void @smax_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: smax z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #46] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #44] +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #60] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #42] +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #58] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #40] +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #56] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #38] +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #54] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #36] +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #52] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #34] +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #50] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #48] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = call <16 x i16> @llvm.smax.v16i16(<16 x i16> %op1, <16 x i16> %op2) @@ -105,6 +586,23 @@ define <2 x i32> @smax_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, gt +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %op1, <2 x i32> %op2) ret <2 x i32> %res } @@ -118,6 +616,30 @@ define <4 x i32> @smax_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, gt +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, gt +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %op1, <4 x i32> %op2) ret <4 x i32> %res } @@ -133,6 +655,51 @@ define void @smax_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: smax z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, gt +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #88] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, gt +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, gt +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #72] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, gt +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %op1, <8 x i32> %op2) @@ -150,6 +717,19 @@ define <1 x i64> @smax_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csel x8, x9, x8, gt +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <1 x i64> @llvm.smax.v1i64(<1 x i64> %op1, <1 x i64> %op2) ret <1 x i64> %res } @@ -164,6 +744,22 @@ define <2 x i64> @smax_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp x10, x8 +; NONEON-NOSVE-NEXT: csel x11, x10, x8, gt +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csel x8, x9, x8, gt +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %op1, <2 x i64> %op2) ret <2 x i64> %res } @@ -179,6 +775,35 @@ define void @smax_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: smax z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #56] +; NONEON-NOSVE-NEXT: cmp x10, x8 +; NONEON-NOSVE-NEXT: csel x11, x10, x8, gt +; NONEON-NOSVE-NEXT: ldr x8, [sp, #48] +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csel x8, x9, x8, gt +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #80] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp x10, x8 +; NONEON-NOSVE-NEXT: csel x11, x10, x8, gt +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csel x8, x9, x8, gt +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %op1, <4 x i64> %op2) @@ -199,6 +824,55 @@ define <8 x i8> @smin_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <8 x i8> @llvm.smin.v8i8(<8 x i8> %op1, <8 x i8> %op2) ret <8 x i8> %res } @@ -212,6 +886,94 @@ define <16 x i8> @smin_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #29] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #27] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #25] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %op1, <16 x i8> %op2) ret <16 x i8> %res } @@ -227,6 +989,179 @@ define void @smin_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: smin z1.b, p0/m, z1.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #47] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #95] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #62] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #61] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #93] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #60] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #59] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #91] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #58] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #57] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #89] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #56] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #55] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #54] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #53] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #85] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #52] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #51] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #50] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #49] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #81] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #48] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #31] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #79] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #29] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #77] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #27] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #75] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #25] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #73] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #71] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #69] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #67] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #65] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = call <32 x i8> @llvm.smin.v32i8(<32 x i8> %op1, <32 x i8> %op2) @@ -243,6 +1178,35 @@ define <4 x i16> @smin_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i16> @llvm.smin.v4i16(<4 x i16> %op1, <4 x i16> %op2) ret <4 x i16> %res } @@ -256,6 +1220,54 @@ define <8 x i16> @smin_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %op1, <8 x i16> %op2) ret <8 x i16> %res } @@ -271,6 +1283,99 @@ define void @smin_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: smin z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #46] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #44] +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #60] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #42] +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #58] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #40] +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #56] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #38] +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #54] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #36] +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #52] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #34] +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #50] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #48] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = call <16 x i16> @llvm.smin.v16i16(<16 x i16> %op1, <16 x i16> %op2) @@ -287,6 +1392,23 @@ define <2 x i32> @smin_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, lt +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %op1, <2 x i32> %op2) ret <2 x i32> %res } @@ -300,6 +1422,30 @@ define <4 x i32> @smin_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, lt +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, lt +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %op1, <4 x i32> %op2) ret <4 x i32> %res } @@ -315,6 +1461,51 @@ define void @smin_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: smin z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, lt +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #88] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, lt +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, lt +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #72] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, lt +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %op1, <8 x i32> %op2) @@ -332,6 +1523,19 @@ define <1 x i64> @smin_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csel x8, x9, x8, lt +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <1 x i64> @llvm.smin.v1i64(<1 x i64> %op1, <1 x i64> %op2) ret <1 x i64> %res } @@ -346,6 +1550,22 @@ define <2 x i64> @smin_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp x10, x8 +; NONEON-NOSVE-NEXT: csel x11, x10, x8, lt +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csel x8, x9, x8, lt +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %op1, <2 x i64> %op2) ret <2 x i64> %res } @@ -361,6 +1581,35 @@ define void @smin_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: smin z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #56] +; NONEON-NOSVE-NEXT: cmp x10, x8 +; NONEON-NOSVE-NEXT: csel x11, x10, x8, lt +; NONEON-NOSVE-NEXT: ldr x8, [sp, #48] +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csel x8, x9, x8, lt +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #80] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp x10, x8 +; NONEON-NOSVE-NEXT: csel x11, x10, x8, lt +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csel x8, x9, x8, lt +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %op1, <4 x i64> %op2) @@ -381,6 +1630,55 @@ define <8 x i8> @umax_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <8 x i8> @llvm.umax.v8i8(<8 x i8> %op1, <8 x i8> %op2) ret <8 x i8> %res } @@ -394,6 +1692,94 @@ define <16 x i8> @umax_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %op1, <16 x i8> %op2) ret <16 x i8> %res } @@ -409,6 +1795,179 @@ define void @umax_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: umax z1.b, p0/m, z1.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #47] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #95] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #62] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #61] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #93] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #60] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #59] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #91] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #58] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #57] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #89] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #56] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #55] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #54] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #53] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #85] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #52] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #51] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #50] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #49] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #81] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #48] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #79] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #77] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #75] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #73] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #71] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #69] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #67] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #65] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = call <32 x i8> @llvm.umax.v32i8(<32 x i8> %op1, <32 x i8> %op2) @@ -425,6 +1984,35 @@ define <4 x i16> @umax_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i16> @llvm.umax.v4i16(<4 x i16> %op1, <4 x i16> %op2) ret <4 x i16> %res } @@ -438,6 +2026,54 @@ define <8 x i16> @umax_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %op1, <8 x i16> %op2) ret <8 x i16> %res } @@ -453,6 +2089,99 @@ define void @umax_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: umax z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #46] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #44] +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #42] +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #58] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #40] +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #38] +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #54] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #36] +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #34] +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #50] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = call <16 x i16> @llvm.umax.v16i16(<16 x i16> %op1, <16 x i16> %op2) @@ -469,6 +2198,23 @@ define <2 x i32> @umax_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, hi +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i32> @llvm.umax.v2i32(<2 x i32> %op1, <2 x i32> %op2) ret <2 x i32> %res } @@ -482,6 +2228,30 @@ define <4 x i32> @umax_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, hi +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, hi +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %op1, <4 x i32> %op2) ret <4 x i32> %res } @@ -497,6 +2267,51 @@ define void @umax_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: umax z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, hi +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #88] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, hi +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, hi +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #72] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, hi +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = call <8 x i32> @llvm.umax.v8i32(<8 x i32> %op1, <8 x i32> %op2) @@ -514,6 +2329,19 @@ define <1 x i64> @umax_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csel x8, x9, x8, hi +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <1 x i64> @llvm.umax.v1i64(<1 x i64> %op1, <1 x i64> %op2) ret <1 x i64> %res } @@ -528,6 +2356,22 @@ define <2 x i64> @umax_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp x10, x8 +; NONEON-NOSVE-NEXT: csel x11, x10, x8, hi +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csel x8, x9, x8, hi +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %op1, <2 x i64> %op2) ret <2 x i64> %res } @@ -543,6 +2387,35 @@ define void @umax_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: umax z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #56] +; NONEON-NOSVE-NEXT: cmp x10, x8 +; NONEON-NOSVE-NEXT: csel x11, x10, x8, hi +; NONEON-NOSVE-NEXT: ldr x8, [sp, #48] +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csel x8, x9, x8, hi +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #80] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp x10, x8 +; NONEON-NOSVE-NEXT: csel x11, x10, x8, hi +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csel x8, x9, x8, hi +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = call <4 x i64> @llvm.umax.v4i64(<4 x i64> %op1, <4 x i64> %op2) @@ -563,6 +2436,55 @@ define <8 x i8> @umin_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <8 x i8> @llvm.umin.v8i8(<8 x i8> %op1, <8 x i8> %op2) ret <8 x i8> %res } @@ -576,6 +2498,94 @@ define <16 x i8> @umin_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %op1, <16 x i8> %op2) ret <16 x i8> %res } @@ -591,6 +2601,179 @@ define void @umin_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: umin z1.b, p0/m, z1.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #47] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #95] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #62] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #61] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #93] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #60] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #59] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #91] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #58] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #57] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #89] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #56] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #55] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #54] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #53] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #85] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #52] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #51] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #50] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #49] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #81] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #48] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #79] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #77] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #75] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #73] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #71] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #69] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #67] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #65] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = call <32 x i8> @llvm.umin.v32i8(<32 x i8> %op1, <32 x i8> %op2) @@ -607,6 +2790,35 @@ define <4 x i16> @umin_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i16> @llvm.umin.v4i16(<4 x i16> %op1, <4 x i16> %op2) ret <4 x i16> %res } @@ -620,6 +2832,54 @@ define <8 x i16> @umin_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %op1, <8 x i16> %op2) ret <8 x i16> %res } @@ -635,6 +2895,99 @@ define void @umin_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: umin z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #46] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #44] +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #42] +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #58] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #40] +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #38] +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #54] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #36] +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #34] +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #50] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = call <16 x i16> @llvm.umin.v16i16(<16 x i16> %op1, <16 x i16> %op2) @@ -651,6 +3004,23 @@ define <2 x i32> @umin_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, lo +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %op1, <2 x i32> %op2) ret <2 x i32> %res } @@ -664,6 +3034,30 @@ define <4 x i32> @umin_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, lo +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, lo +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %op1, <4 x i32> %op2) ret <4 x i32> %res } @@ -679,6 +3073,51 @@ define void @umin_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: umin z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, lo +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #88] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, lo +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, lo +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #72] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w10, w8 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, lo +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %op1, <8 x i32> %op2) @@ -696,6 +3135,19 @@ define <1 x i64> @umin_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csel x8, x9, x8, lo +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <1 x i64> @llvm.umin.v1i64(<1 x i64> %op1, <1 x i64> %op2) ret <1 x i64> %res } @@ -710,6 +3162,22 @@ define <2 x i64> @umin_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp x10, x8 +; NONEON-NOSVE-NEXT: csel x11, x10, x8, lo +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csel x8, x9, x8, lo +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %op1, <2 x i64> %op2) ret <2 x i64> %res } @@ -725,6 +3193,35 @@ define void @umin_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: umin z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #56] +; NONEON-NOSVE-NEXT: cmp x10, x8 +; NONEON-NOSVE-NEXT: csel x11, x10, x8, lo +; NONEON-NOSVE-NEXT: ldr x8, [sp, #48] +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csel x8, x9, x8, lo +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #80] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp x10, x8 +; NONEON-NOSVE-NEXT: csel x11, x10, x8, lo +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csel x8, x9, x8, lo +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %op1, <4 x i64> %op2) diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll index 3ff6983210a0a..94d5bb1543b0e 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sme-fa64 -force-streaming-compatible < %s | FileCheck %s -check-prefix=FA64 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s -check-prefix=NO-FA64 +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -20,6 +21,55 @@ define <8 x i8> @mla8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) { ; NO-FA64-NEXT: mad z0.b, p0/m, z1.b, z2.b ; NO-FA64-NEXT: // kill: def $d0 killed $d0 killed $z0 ; NO-FA64-NEXT: ret +; +; NONEON-NOSVE-LABEL: mla8xi8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrb w1, [sp, #15] +; NONEON-NOSVE-NEXT: ldrb w2, [sp, #7] +; NONEON-NOSVE-NEXT: str d2, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w5, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w3, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w4, [sp, #6] +; NONEON-NOSVE-NEXT: madd w1, w2, w1, w5 +; NONEON-NOSVE-NEXT: ldrb w18, [sp, #13] +; NONEON-NOSVE-NEXT: ldrb w0, [sp, #5] +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #4] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #11] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #3] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #2] +; NONEON-NOSVE-NEXT: strb w1, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w1, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #9] +; NONEON-NOSVE-NEXT: madd w1, w4, w3, w1 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #1] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w1, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w1, [sp, #21] +; NONEON-NOSVE-NEXT: madd w18, w0, w18, w1 +; NONEON-NOSVE-NEXT: strb w18, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w18, [sp, #20] +; NONEON-NOSVE-NEXT: madd w16, w17, w16, w18 +; NONEON-NOSVE-NEXT: strb w16, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #19] +; NONEON-NOSVE-NEXT: madd w14, w15, w14, w16 +; NONEON-NOSVE-NEXT: strb w14, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #18] +; NONEON-NOSVE-NEXT: madd w12, w13, w12, w14 +; NONEON-NOSVE-NEXT: strb w12, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #17] +; NONEON-NOSVE-NEXT: madd w10, w11, w10, w12 +; NONEON-NOSVE-NEXT: strb w10, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #16] +; NONEON-NOSVE-NEXT: madd w8, w9, w8, w10 +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %tmp1 = mul <8 x i8> %A, %B; %tmp2 = add <8 x i8> %C, %tmp1; ret <8 x i8> %tmp2 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll index 8917f43002daf..6198926c0b438 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll @@ -2,6 +2,7 @@ ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE ; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2 ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2 +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE ; This test only tests the legal types for a given vector width, as mulh nodes ; do not get generated for non-legal types. @@ -36,6 +37,35 @@ define <4 x i8> @smulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; SVE2-NEXT: lsr z0.h, z0.h, #4 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrsb w12, [sp, #22] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12] +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #10] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsb w13, [sp, #20] +; NONEON-NOSVE-NEXT: ldrsb w14, [sp, #18] +; NONEON-NOSVE-NEXT: mul w8, w8, w12 +; NONEON-NOSVE-NEXT: ldrsb w12, [sp, #16] +; NONEON-NOSVE-NEXT: mul w9, w9, w13 +; NONEON-NOSVE-NEXT: mul w10, w10, w14 +; NONEON-NOSVE-NEXT: mul w11, w11, w12 +; NONEON-NOSVE-NEXT: ubfx w8, w8, #4, #12 +; NONEON-NOSVE-NEXT: ubfx w9, w9, #4, #12 +; NONEON-NOSVE-NEXT: ubfx w10, w10, #4, #12 +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ubfx w8, w11, #4, #12 +; NONEON-NOSVE-NEXT: strh w9, [sp, #28] +; NONEON-NOSVE-NEXT: strh w10, [sp, #26] +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <4 x i16> undef, i16 4, i64 0 %splat = shufflevector <4 x i16> %insert, <4 x i16> undef, <4 x i32> zeroinitializer %1 = sext <4 x i8> %op1 to <4 x i16> @@ -63,6 +93,55 @@ define <8 x i8> @smulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; SVE2-NEXT: smulh z0.b, z0.b, z1.b ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsb w15, [sp, #15] +; NONEON-NOSVE-NEXT: ldrsb w16, [sp, #23] +; NONEON-NOSVE-NEXT: ldrsb w12, [sp, #12] +; NONEON-NOSVE-NEXT: ldrsb w13, [sp, #13] +; NONEON-NOSVE-NEXT: ldrsb w14, [sp, #14] +; NONEON-NOSVE-NEXT: ldrsb w17, [sp, #22] +; NONEON-NOSVE-NEXT: mul w15, w15, w16 +; NONEON-NOSVE-NEXT: ldrsb w16, [sp, #21] +; NONEON-NOSVE-NEXT: ldrsb w18, [sp, #20] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #8] +; NONEON-NOSVE-NEXT: mul w14, w14, w17 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #10] +; NONEON-NOSVE-NEXT: mul w13, w13, w16 +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #11] +; NONEON-NOSVE-NEXT: ldrsb w17, [sp, #16] +; NONEON-NOSVE-NEXT: mul w12, w12, w18 +; NONEON-NOSVE-NEXT: lsr w15, w15, #8 +; NONEON-NOSVE-NEXT: ldrsb w0, [sp, #19] +; NONEON-NOSVE-NEXT: ldrsb w16, [sp, #18] +; NONEON-NOSVE-NEXT: lsr w14, w14, #8 +; NONEON-NOSVE-NEXT: ldrsb w18, [sp, #17] +; NONEON-NOSVE-NEXT: mul w8, w8, w17 +; NONEON-NOSVE-NEXT: lsr w13, w13, #8 +; NONEON-NOSVE-NEXT: mul w11, w11, w0 +; NONEON-NOSVE-NEXT: lsr w12, w12, #8 +; NONEON-NOSVE-NEXT: strb w15, [sp, #31] +; NONEON-NOSVE-NEXT: mul w10, w10, w16 +; NONEON-NOSVE-NEXT: strb w14, [sp, #30] +; NONEON-NOSVE-NEXT: mul w9, w9, w18 +; NONEON-NOSVE-NEXT: lsr w8, w8, #8 +; NONEON-NOSVE-NEXT: strb w13, [sp, #29] +; NONEON-NOSVE-NEXT: lsr w11, w11, #8 +; NONEON-NOSVE-NEXT: strb w12, [sp, #28] +; NONEON-NOSVE-NEXT: lsr w10, w10, #8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: lsr w9, w9, #8 +; NONEON-NOSVE-NEXT: strb w11, [sp, #27] +; NONEON-NOSVE-NEXT: strb w10, [sp, #26] +; NONEON-NOSVE-NEXT: strb w9, [sp, #25] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <8 x i16> undef, i16 8, i64 0 %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer %1 = sext <8 x i8> %op1 to <8 x i16> @@ -90,6 +169,120 @@ define <16 x i8> @smulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; SVE2-NEXT: smulh z0.b, z0.b, z1.b ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #160 +; NONEON-NOSVE-NEXT: str x27, [sp, #80] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #96] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #112] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #128] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #144] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 +; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 +; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 +; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 +; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 +; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 +; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 +; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 +; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 +; NONEON-NOSVE-NEXT: .cfi_offset w27, -80 +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: ldp d2, d0, [sp] +; NONEON-NOSVE-NEXT: str q1, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #40] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldrsb w6, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsb w7, [sp, #45] +; NONEON-NOSVE-NEXT: ldrsb w19, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsb w20, [sp, #47] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #49] +; NONEON-NOSVE-NEXT: str d0, [sp, #56] +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #50] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #51] +; NONEON-NOSVE-NEXT: ldrsb w21, [sp, #63] +; NONEON-NOSVE-NEXT: ldrsb w23, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsb w25, [sp, #61] +; NONEON-NOSVE-NEXT: ldrsb w26, [sp, #60] +; NONEON-NOSVE-NEXT: str d1, [sp, #88] +; NONEON-NOSVE-NEXT: ldrsb w12, [sp, #52] +; NONEON-NOSVE-NEXT: mul w20, w20, w21 +; NONEON-NOSVE-NEXT: ldrsb w13, [sp, #53] +; NONEON-NOSVE-NEXT: ldrsb w15, [sp, #54] +; NONEON-NOSVE-NEXT: mul w19, w19, w23 +; NONEON-NOSVE-NEXT: ldrsb w17, [sp, #55] +; NONEON-NOSVE-NEXT: ldrsb w0, [sp, #40] +; NONEON-NOSVE-NEXT: mul w7, w7, w25 +; NONEON-NOSVE-NEXT: ldrsb w2, [sp, #41] +; NONEON-NOSVE-NEXT: ldrsb w3, [sp, #42] +; NONEON-NOSVE-NEXT: mul w6, w6, w26 +; NONEON-NOSVE-NEXT: lsr w20, w20, #8 +; NONEON-NOSVE-NEXT: ldrsb w4, [sp, #43] +; NONEON-NOSVE-NEXT: ldrsb w14, [sp, #88] +; NONEON-NOSVE-NEXT: lsr w19, w19, #8 +; NONEON-NOSVE-NEXT: ldrsb w16, [sp, #89] +; NONEON-NOSVE-NEXT: ldrsb w18, [sp, #90] +; NONEON-NOSVE-NEXT: lsr w7, w7, #8 +; NONEON-NOSVE-NEXT: ldrsb w1, [sp, #91] +; NONEON-NOSVE-NEXT: ldrsb w5, [sp, #92] +; NONEON-NOSVE-NEXT: mul w9, w9, w16 +; NONEON-NOSVE-NEXT: lsr w6, w6, #8 +; NONEON-NOSVE-NEXT: ldrsb w22, [sp, #93] +; NONEON-NOSVE-NEXT: ldrsb w24, [sp, #94] +; NONEON-NOSVE-NEXT: mul w11, w11, w1 +; NONEON-NOSVE-NEXT: ldrsb w21, [sp, #95] +; NONEON-NOSVE-NEXT: ldrsb w23, [sp, #56] +; NONEON-NOSVE-NEXT: mul w12, w12, w5 +; NONEON-NOSVE-NEXT: ldrsb w27, [sp, #59] +; NONEON-NOSVE-NEXT: ldrsb w25, [sp, #58] +; NONEON-NOSVE-NEXT: mul w15, w15, w24 +; NONEON-NOSVE-NEXT: ldrsb w26, [sp, #57] +; NONEON-NOSVE-NEXT: mul w0, w0, w23 +; NONEON-NOSVE-NEXT: lsr w11, w11, #8 +; NONEON-NOSVE-NEXT: mul w4, w4, w27 +; NONEON-NOSVE-NEXT: lsr w12, w12, #8 +; NONEON-NOSVE-NEXT: lsr w9, w9, #8 +; NONEON-NOSVE-NEXT: mul w3, w3, w25 +; NONEON-NOSVE-NEXT: lsr w15, w15, #8 +; NONEON-NOSVE-NEXT: strb w20, [sp, #79] +; NONEON-NOSVE-NEXT: mul w2, w2, w26 +; NONEON-NOSVE-NEXT: lsr w0, w0, #8 +; NONEON-NOSVE-NEXT: strb w19, [sp, #78] +; NONEON-NOSVE-NEXT: mul w17, w17, w21 +; NONEON-NOSVE-NEXT: lsr w4, w4, #8 +; NONEON-NOSVE-NEXT: strb w7, [sp, #77] +; NONEON-NOSVE-NEXT: mul w13, w13, w22 +; NONEON-NOSVE-NEXT: lsr w3, w3, #8 +; NONEON-NOSVE-NEXT: strb w6, [sp, #76] +; NONEON-NOSVE-NEXT: mul w10, w10, w18 +; NONEON-NOSVE-NEXT: lsr w2, w2, #8 +; NONEON-NOSVE-NEXT: strb w4, [sp, #75] +; NONEON-NOSVE-NEXT: mul w8, w8, w14 +; NONEON-NOSVE-NEXT: lsr w17, w17, #8 +; NONEON-NOSVE-NEXT: strb w3, [sp, #74] +; NONEON-NOSVE-NEXT: lsr w13, w13, #8 +; NONEON-NOSVE-NEXT: strb w2, [sp, #73] +; NONEON-NOSVE-NEXT: ldr x27, [sp, #80] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: lsr w10, w10, #8 +; NONEON-NOSVE-NEXT: strb w0, [sp, #72] +; NONEON-NOSVE-NEXT: lsr w8, w8, #8 +; NONEON-NOSVE-NEXT: strb w17, [sp, #71] +; NONEON-NOSVE-NEXT: strb w15, [sp, #70] +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #144] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w13, [sp, #69] +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #128] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w12, [sp, #68] +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #112] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w11, [sp, #67] +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #96] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w10, [sp, #66] +; NONEON-NOSVE-NEXT: strb w9, [sp, #65] +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] +; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: ret %1 = sext <16 x i8> %op1 to <16 x i16> %2 = sext <16 x i8> %op2 to <16 x i16> %mul = mul <16 x i16> %1, %2 @@ -118,6 +311,255 @@ define void @smulh_v32i8(ptr %a, ptr %b) { ; SVE2-NEXT: smulh z1.b, z2.b, z3.b ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #384 +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #288] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #304] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #320] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #336] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #352] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #368] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 384 +; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 +; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 +; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 +; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 +; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 +; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 +; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 +; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 +; NONEON-NOSVE-NEXT: .cfi_offset w27, -72 +; NONEON-NOSVE-NEXT: .cfi_offset w28, -80 +; NONEON-NOSVE-NEXT: .cfi_offset w30, -88 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -96 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: mov x29, x0 +; NONEON-NOSVE-NEXT: ldp q3, q2, [x1] +; NONEON-NOSVE-NEXT: str q0, [sp, #128] +; NONEON-NOSVE-NEXT: str q1, [sp, #160] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #128] +; NONEON-NOSVE-NEXT: str q3, [sp, #144] +; NONEON-NOSVE-NEXT: str q2, [sp, #192] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #176] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #160] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #184] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #185] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #186] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #187] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #224] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #144] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #188] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #189] +; NONEON-NOSVE-NEXT: ldrsb w13, [sp, #229] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #227] +; NONEON-NOSVE-NEXT: ldrsb w12, [sp, #228] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #190] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #191] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #192] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #176] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #177] +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #226] +; NONEON-NOSVE-NEXT: ldrsb w2, [sp, #214] +; NONEON-NOSVE-NEXT: ldrsb w1, [sp, #215] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #178] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #179] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #240] +; NONEON-NOSVE-NEXT: ldrsb w4, [sp, #212] +; NONEON-NOSVE-NEXT: ldrsb w3, [sp, #213] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #180] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #181] +; NONEON-NOSVE-NEXT: ldrsb w14, [sp, #247] +; NONEON-NOSVE-NEXT: ldrsb w15, [sp, #246] +; NONEON-NOSVE-NEXT: ldrsb w16, [sp, #244] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #182] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #183] +; NONEON-NOSVE-NEXT: mul w26, w12, w16 +; NONEON-NOSVE-NEXT: ldrsb w12, [sp, #242] +; NONEON-NOSVE-NEXT: ldrsb w16, [sp, #250] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #232] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #233] +; NONEON-NOSVE-NEXT: mul w30, w10, w12 +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #255] +; NONEON-NOSVE-NEXT: ldrsb w12, [sp, #253] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #234] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #235] +; NONEON-NOSVE-NEXT: ldrsb w0, [sp, #248] +; NONEON-NOSVE-NEXT: ldrsb w18, [sp, #249] +; NONEON-NOSVE-NEXT: ldrsb w6, [sp, #210] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #236] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #237] +; NONEON-NOSVE-NEXT: ldrsb w5, [sp, #211] +; NONEON-NOSVE-NEXT: ldrsb w19, [sp, #208] +; NONEON-NOSVE-NEXT: ldrsb w7, [sp, #209] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #238] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #239] +; NONEON-NOSVE-NEXT: ldrsb w21, [sp, #222] +; NONEON-NOSVE-NEXT: ldrsb w20, [sp, #223] +; NONEON-NOSVE-NEXT: ldrsb w23, [sp, #220] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #224] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #225] +; NONEON-NOSVE-NEXT: ldrsb w22, [sp, #221] +; NONEON-NOSVE-NEXT: ldrsb w24, [sp, #219] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #230] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #231] +; NONEON-NOSVE-NEXT: mul w27, w8, w14 +; NONEON-NOSVE-NEXT: ldrsb w14, [sp, #245] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #217] +; NONEON-NOSVE-NEXT: mul w9, w9, w15 +; NONEON-NOSVE-NEXT: ldrsb w15, [sp, #251] +; NONEON-NOSVE-NEXT: mul w25, w13, w14 +; NONEON-NOSVE-NEXT: ldrsb w13, [sp, #243] +; NONEON-NOSVE-NEXT: lsr w14, w27, #8 +; NONEON-NOSVE-NEXT: ldrsb w27, [sp, #218] +; NONEON-NOSVE-NEXT: lsr w17, w9, #8 +; NONEON-NOSVE-NEXT: mul w28, w11, w13 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #216] +; NONEON-NOSVE-NEXT: strb w14, [sp, #287] +; NONEON-NOSVE-NEXT: lsr w14, w25, #8 +; NONEON-NOSVE-NEXT: ldr w25, [sp, #24] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #241] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #240] +; NONEON-NOSVE-NEXT: strb w14, [sp, #285] +; NONEON-NOSVE-NEXT: lsr w14, w28, #8 +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #254] +; NONEON-NOSVE-NEXT: mul w8, w25, w8 +; NONEON-NOSVE-NEXT: ldr w25, [sp, #28] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldrsb w13, [sp, #252] +; NONEON-NOSVE-NEXT: strb w14, [sp, #283] +; NONEON-NOSVE-NEXT: ldr w14, [sp, #40] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mul w9, w25, w9 +; NONEON-NOSVE-NEXT: ldr w25, [sp, #32] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w17, [sp, #286] +; NONEON-NOSVE-NEXT: mul w12, w14, w12 +; NONEON-NOSVE-NEXT: lsr w8, w8, #8 +; NONEON-NOSVE-NEXT: lsr w17, w26, #8 +; NONEON-NOSVE-NEXT: mul w10, w25, w10 +; NONEON-NOSVE-NEXT: ldr w25, [sp, #36] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w14, [sp, #44] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: lsr w9, w9, #8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #281] +; NONEON-NOSVE-NEXT: mul w11, w25, w11 +; NONEON-NOSVE-NEXT: strb w17, [sp, #284] +; NONEON-NOSVE-NEXT: lsr w17, w30, #8 +; NONEON-NOSVE-NEXT: mul w13, w14, w13 +; NONEON-NOSVE-NEXT: lsr w8, w10, #8 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #48] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #280] +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #320] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: lsr w9, w11, #8 +; NONEON-NOSVE-NEXT: mul w10, w10, w15 +; NONEON-NOSVE-NEXT: ldr w11, [sp, #52] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w8, [sp, #279] +; NONEON-NOSVE-NEXT: lsr w8, w12, #8 +; NONEON-NOSVE-NEXT: ldr w12, [sp, #56] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mul w11, w11, w16 +; NONEON-NOSVE-NEXT: strb w9, [sp, #278] +; NONEON-NOSVE-NEXT: lsr w9, w13, #8 +; NONEON-NOSVE-NEXT: mul w12, w12, w18 +; NONEON-NOSVE-NEXT: ldr w13, [sp, #60] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w8, [sp, #277] +; NONEON-NOSVE-NEXT: lsr w8, w10, #8 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #64] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #276] +; NONEON-NOSVE-NEXT: mul w13, w13, w0 +; NONEON-NOSVE-NEXT: lsr w9, w11, #8 +; NONEON-NOSVE-NEXT: ldr w11, [sp, #68] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mul w10, w10, w1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #275] +; NONEON-NOSVE-NEXT: lsr w8, w12, #8 +; NONEON-NOSVE-NEXT: mul w11, w11, w2 +; NONEON-NOSVE-NEXT: ldr w12, [sp, #72] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #274] +; NONEON-NOSVE-NEXT: lsr w9, w13, #8 +; NONEON-NOSVE-NEXT: ldr w13, [sp, #76] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w8, [sp, #273] +; NONEON-NOSVE-NEXT: mul w12, w12, w3 +; NONEON-NOSVE-NEXT: lsr w8, w10, #8 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #80] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mul w13, w13, w4 +; NONEON-NOSVE-NEXT: strb w9, [sp, #272] +; NONEON-NOSVE-NEXT: lsr w9, w11, #8 +; NONEON-NOSVE-NEXT: mul w10, w10, w5 +; NONEON-NOSVE-NEXT: ldr w11, [sp, #84] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w8, [sp, #271] +; NONEON-NOSVE-NEXT: lsr w8, w12, #8 +; NONEON-NOSVE-NEXT: ldr w12, [sp, #88] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #270] +; NONEON-NOSVE-NEXT: mul w11, w11, w6 +; NONEON-NOSVE-NEXT: lsr w9, w13, #8 +; NONEON-NOSVE-NEXT: ldr w13, [sp, #92] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mul w12, w12, w7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #269] +; NONEON-NOSVE-NEXT: lsr w8, w10, #8 +; NONEON-NOSVE-NEXT: mul w13, w13, w19 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #96] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #268] +; NONEON-NOSVE-NEXT: lsr w9, w11, #8 +; NONEON-NOSVE-NEXT: ldr w11, [sp, #100] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w8, [sp, #267] +; NONEON-NOSVE-NEXT: mul w10, w10, w20 +; NONEON-NOSVE-NEXT: lsr w8, w12, #8 +; NONEON-NOSVE-NEXT: ldr w12, [sp, #104] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mul w11, w11, w21 +; NONEON-NOSVE-NEXT: strb w9, [sp, #266] +; NONEON-NOSVE-NEXT: lsr w9, w13, #8 +; NONEON-NOSVE-NEXT: ldr w13, [sp, #108] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mul w12, w12, w22 +; NONEON-NOSVE-NEXT: strb w8, [sp, #265] +; NONEON-NOSVE-NEXT: lsr w8, w10, #8 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #112] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #264] +; NONEON-NOSVE-NEXT: mul w13, w13, w23 +; NONEON-NOSVE-NEXT: lsr w9, w11, #8 +; NONEON-NOSVE-NEXT: ldr w11, [sp, #116] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp w15, w14, [sp, #16] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: mul w10, w10, w24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #263] +; NONEON-NOSVE-NEXT: lsr w8, w12, #8 +; NONEON-NOSVE-NEXT: mul w11, w11, w27 +; NONEON-NOSVE-NEXT: ldr w12, [sp, #120] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #262] +; NONEON-NOSVE-NEXT: lsr w9, w13, #8 +; NONEON-NOSVE-NEXT: ldr w13, [sp, #124] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w8, [sp, #261] +; NONEON-NOSVE-NEXT: mul w12, w12, w15 +; NONEON-NOSVE-NEXT: lsr w8, w10, #8 +; NONEON-NOSVE-NEXT: strb w17, [sp, #282] +; NONEON-NOSVE-NEXT: mul w13, w13, w14 +; NONEON-NOSVE-NEXT: strb w9, [sp, #260] +; NONEON-NOSVE-NEXT: lsr w9, w11, #8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #259] +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #368] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: lsr w8, w12, #8 +; NONEON-NOSVE-NEXT: strb w9, [sp, #258] +; NONEON-NOSVE-NEXT: lsr w9, w13, #8 +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #352] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w8, [sp, #257] +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #336] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #256] +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #304] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #256] +; NONEON-NOSVE-NEXT: stp q0, q1, [x29] +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: add sp, sp, #384 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %1 = sext <32 x i8> %op1 to <32 x i16> @@ -153,6 +595,24 @@ define <2 x i16> @smulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; SVE2-NEXT: lsr z0.s, z0.s, #16 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #20] +; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #16] +; NONEON-NOSVE-NEXT: mul w8, w8, w10 +; NONEON-NOSVE-NEXT: mul w9, w9, w11 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: lsr w9, w9, #16 +; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %1 = sext <2 x i16> %op1 to <2 x i32> %2 = sext <2 x i16> %op2 to <2 x i32> %mul = mul <2 x i32> %1, %2 @@ -178,6 +638,35 @@ define <4 x i16> @smulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; SVE2-NEXT: smulh z0.h, z0.h, z1.h ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #14] +; NONEON-NOSVE-NEXT: ldrsh w12, [sp, #22] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #12] +; NONEON-NOSVE-NEXT: ldrsh w13, [sp, #20] +; NONEON-NOSVE-NEXT: ldrsh w14, [sp, #18] +; NONEON-NOSVE-NEXT: mul w11, w11, w12 +; NONEON-NOSVE-NEXT: ldrsh w12, [sp, #16] +; NONEON-NOSVE-NEXT: mul w10, w10, w13 +; NONEON-NOSVE-NEXT: mul w9, w9, w14 +; NONEON-NOSVE-NEXT: mul w8, w8, w12 +; NONEON-NOSVE-NEXT: lsr w11, w11, #16 +; NONEON-NOSVE-NEXT: lsr w10, w10, #16 +; NONEON-NOSVE-NEXT: lsr w9, w9, #16 +; NONEON-NOSVE-NEXT: strh w11, [sp, #30] +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w10, [sp, #28] +; NONEON-NOSVE-NEXT: strh w9, [sp, #26] +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %1 = sext <4 x i16> %op1 to <4 x i32> %2 = sext <4 x i16> %op2 to <4 x i32> %mul = mul <4 x i32> %1, %2 @@ -203,6 +692,58 @@ define <8 x i16> @smulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; SVE2-NEXT: smulh z0.h, z0.h, z1.h ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-80]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldrsh w15, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsh w12, [sp, #32] +; NONEON-NOSVE-NEXT: ldrsh w13, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsh w14, [sp, #36] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #42] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsh w16, [sp, #54] +; NONEON-NOSVE-NEXT: ldrsh w17, [sp, #52] +; NONEON-NOSVE-NEXT: ldrsh w18, [sp, #50] +; NONEON-NOSVE-NEXT: ldrsh w0, [sp, #62] +; NONEON-NOSVE-NEXT: mul w15, w15, w16 +; NONEON-NOSVE-NEXT: ldrsh w16, [sp, #48] +; NONEON-NOSVE-NEXT: mul w14, w14, w17 +; NONEON-NOSVE-NEXT: ldrsh w17, [sp, #56] +; NONEON-NOSVE-NEXT: mul w13, w13, w18 +; NONEON-NOSVE-NEXT: ldrsh w18, [sp, #60] +; NONEON-NOSVE-NEXT: mul w12, w12, w16 +; NONEON-NOSVE-NEXT: ldrsh w16, [sp, #58] +; NONEON-NOSVE-NEXT: lsr w15, w15, #16 +; NONEON-NOSVE-NEXT: mul w11, w11, w0 +; NONEON-NOSVE-NEXT: lsr w14, w14, #16 +; NONEON-NOSVE-NEXT: mul w10, w10, w18 +; NONEON-NOSVE-NEXT: lsr w13, w13, #16 +; NONEON-NOSVE-NEXT: strh w15, [sp, #78] +; NONEON-NOSVE-NEXT: mul w9, w9, w16 +; NONEON-NOSVE-NEXT: lsr w12, w12, #16 +; NONEON-NOSVE-NEXT: strh w14, [sp, #76] +; NONEON-NOSVE-NEXT: mul w8, w8, w17 +; NONEON-NOSVE-NEXT: lsr w11, w11, #16 +; NONEON-NOSVE-NEXT: strh w13, [sp, #74] +; NONEON-NOSVE-NEXT: lsr w10, w10, #16 +; NONEON-NOSVE-NEXT: strh w12, [sp, #72] +; NONEON-NOSVE-NEXT: lsr w9, w9, #16 +; NONEON-NOSVE-NEXT: strh w11, [sp, #70] +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w10, [sp, #68] +; NONEON-NOSVE-NEXT: strh w9, [sp, #66] +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %1 = sext <8 x i16> %op1 to <8 x i32> %2 = sext <8 x i16> %op2 to <8 x i32> %mul = mul <8 x i32> %1, %2 @@ -231,6 +772,129 @@ define void @smulh_v16i16(ptr %a, ptr %b) { ; SVE2-NEXT: smulh z1.h, z2.h, z3.h ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #240 +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #160] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #176] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #192] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #208] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #224] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 240 +; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 +; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 +; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 +; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 +; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 +; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 +; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 +; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 +; NONEON-NOSVE-NEXT: .cfi_offset w27, -72 +; NONEON-NOSVE-NEXT: .cfi_offset w28, -80 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x1] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: str q1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: str q3, [sp, #16] +; NONEON-NOSVE-NEXT: str q2, [sp, #64] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #58] +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #60] +; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsh w12, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsh w13, [sp, #50] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldrsh w7, [sp, #96] +; NONEON-NOSVE-NEXT: ldrsh w19, [sp, #98] +; NONEON-NOSVE-NEXT: ldrsh w20, [sp, #100] +; NONEON-NOSVE-NEXT: ldrsh w21, [sp, #102] +; NONEON-NOSVE-NEXT: ldrsh w14, [sp, #52] +; NONEON-NOSVE-NEXT: ldrsh w16, [sp, #54] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: ldrsh w18, [sp, #104] +; NONEON-NOSVE-NEXT: ldrsh w2, [sp, #106] +; NONEON-NOSVE-NEXT: ldrsh w4, [sp, #108] +; NONEON-NOSVE-NEXT: ldrsh w5, [sp, #110] +; NONEON-NOSVE-NEXT: ldrsh w15, [sp, #88] +; NONEON-NOSVE-NEXT: ldrsh w17, [sp, #90] +; NONEON-NOSVE-NEXT: ldrsh w1, [sp, #92] +; NONEON-NOSVE-NEXT: ldrsh w3, [sp, #94] +; NONEON-NOSVE-NEXT: mul w8, w8, w15 +; NONEON-NOSVE-NEXT: ldrsh w6, [sp, #80] +; NONEON-NOSVE-NEXT: ldrsh w23, [sp, #82] +; NONEON-NOSVE-NEXT: mul w11, w11, w3 +; NONEON-NOSVE-NEXT: ldrsh w25, [sp, #84] +; NONEON-NOSVE-NEXT: mul w13, w13, w23 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #64] +; NONEON-NOSVE-NEXT: mul w14, w14, w25 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: mul w12, w12, w6 +; NONEON-NOSVE-NEXT: lsr w11, w11, #16 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] +; NONEON-NOSVE-NEXT: mul w10, w10, w1 +; NONEON-NOSVE-NEXT: lsr w13, w13, #16 +; NONEON-NOSVE-NEXT: ldrsh w22, [sp, #118] +; NONEON-NOSVE-NEXT: ldrsh w24, [sp, #116] +; NONEON-NOSVE-NEXT: ldrsh w26, [sp, #114] +; NONEON-NOSVE-NEXT: ldrsh w27, [sp, #112] +; NONEON-NOSVE-NEXT: ldrsh w28, [sp, #126] +; NONEON-NOSVE-NEXT: mul w9, w9, w17 +; NONEON-NOSVE-NEXT: mul w21, w21, w22 +; NONEON-NOSVE-NEXT: ldrsh w22, [sp, #86] +; NONEON-NOSVE-NEXT: lsr w14, w14, #16 +; NONEON-NOSVE-NEXT: mul w20, w20, w24 +; NONEON-NOSVE-NEXT: ldrsh w24, [sp, #120] +; NONEON-NOSVE-NEXT: lsr w12, w12, #16 +; NONEON-NOSVE-NEXT: mul w19, w19, w26 +; NONEON-NOSVE-NEXT: ldrsh w26, [sp, #124] +; NONEON-NOSVE-NEXT: lsr w10, w10, #16 +; NONEON-NOSVE-NEXT: mul w7, w7, w27 +; NONEON-NOSVE-NEXT: ldrsh w27, [sp, #122] +; NONEON-NOSVE-NEXT: lsr w21, w21, #16 +; NONEON-NOSVE-NEXT: mul w5, w5, w28 +; NONEON-NOSVE-NEXT: lsr w20, w20, #16 +; NONEON-NOSVE-NEXT: lsr w9, w9, #16 +; NONEON-NOSVE-NEXT: mul w4, w4, w26 +; NONEON-NOSVE-NEXT: lsr w19, w19, #16 +; NONEON-NOSVE-NEXT: strh w21, [sp, #158] +; NONEON-NOSVE-NEXT: mul w2, w2, w27 +; NONEON-NOSVE-NEXT: lsr w7, w7, #16 +; NONEON-NOSVE-NEXT: strh w20, [sp, #156] +; NONEON-NOSVE-NEXT: mul w18, w18, w24 +; NONEON-NOSVE-NEXT: lsr w5, w5, #16 +; NONEON-NOSVE-NEXT: strh w19, [sp, #154] +; NONEON-NOSVE-NEXT: mul w16, w16, w22 +; NONEON-NOSVE-NEXT: lsr w4, w4, #16 +; NONEON-NOSVE-NEXT: strh w7, [sp, #152] +; NONEON-NOSVE-NEXT: lsr w2, w2, #16 +; NONEON-NOSVE-NEXT: strh w5, [sp, #150] +; NONEON-NOSVE-NEXT: lsr w18, w18, #16 +; NONEON-NOSVE-NEXT: strh w4, [sp, #148] +; NONEON-NOSVE-NEXT: lsr w16, w16, #16 +; NONEON-NOSVE-NEXT: strh w2, [sp, #146] +; NONEON-NOSVE-NEXT: strh w18, [sp, #144] +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #224] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w16, [sp, #142] +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #208] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w14, [sp, #140] +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #192] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w13, [sp, #138] +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #176] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w12, [sp, #136] +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #160] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w11, [sp, #134] +; NONEON-NOSVE-NEXT: strh w10, [sp, #132] +; NONEON-NOSVE-NEXT: strh w9, [sp, #130] +; NONEON-NOSVE-NEXT: strh w8, [sp, #128] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #128] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #240 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %1 = sext <16 x i16> %op1 to <16 x i32> @@ -259,6 +923,22 @@ define <2 x i32> @smulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; SVE2-NEXT: smulh z0.s, z0.s, z1.s ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #8] +; NONEON-NOSVE-NEXT: ldpsw x11, x10, [sp, #16] +; NONEON-NOSVE-NEXT: smull x9, w9, w10 +; NONEON-NOSVE-NEXT: smull x8, w8, w11 +; NONEON-NOSVE-NEXT: lsr x9, x9, #32 +; NONEON-NOSVE-NEXT: lsr x8, x8, #32 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %1 = sext <2 x i32> %op1 to <2 x i64> %2 = sext <2 x i32> %op2 to <2 x i64> %mul = mul <2 x i64> %1, %2 @@ -284,6 +964,32 @@ define <4 x i32> @smulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; SVE2-NEXT: smulh z0.s, z0.s, z1.s ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-80]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #40] +; NONEON-NOSVE-NEXT: ldpsw x10, x11, [sp, #32] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldpsw x13, x12, [sp, #48] +; NONEON-NOSVE-NEXT: smull x11, w11, w12 +; NONEON-NOSVE-NEXT: ldpsw x12, x14, [sp, #56] +; NONEON-NOSVE-NEXT: smull x10, w10, w13 +; NONEON-NOSVE-NEXT: lsr x11, x11, #32 +; NONEON-NOSVE-NEXT: smull x9, w9, w14 +; NONEON-NOSVE-NEXT: smull x8, w8, w12 +; NONEON-NOSVE-NEXT: lsr x10, x10, #32 +; NONEON-NOSVE-NEXT: lsr x9, x9, #32 +; NONEON-NOSVE-NEXT: stp w10, w11, [sp, #72] +; NONEON-NOSVE-NEXT: lsr x8, x8, #32 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %1 = sext <4 x i32> %op1 to <4 x i64> %2 = sext <4 x i32> %op2 to <4 x i64> %mul = mul <4 x i64> %1, %2 @@ -312,6 +1018,56 @@ define void @smulh_v8i32(ptr %a, ptr %b) { ; SVE2-NEXT: smulh z1.s, z2.s, z3.s ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #160 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x1] +; NONEON-NOSVE-NEXT: str q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q3, [sp] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #32] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #56] +; NONEON-NOSVE-NEXT: ldpsw x10, x11, [sp, #48] +; NONEON-NOSVE-NEXT: ldpsw x12, x13, [sp, #104] +; NONEON-NOSVE-NEXT: ldpsw x14, x15, [sp, #96] +; NONEON-NOSVE-NEXT: str q2, [sp, #64] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #64] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] +; NONEON-NOSVE-NEXT: ldpsw x17, x16, [sp, #112] +; NONEON-NOSVE-NEXT: smull x15, w15, w16 +; NONEON-NOSVE-NEXT: ldpsw x16, x18, [sp, #120] +; NONEON-NOSVE-NEXT: smull x14, w14, w17 +; NONEON-NOSVE-NEXT: ldpsw x17, x1, [sp, #80] +; NONEON-NOSVE-NEXT: smull x13, w13, w18 +; NONEON-NOSVE-NEXT: lsr x15, x15, #32 +; NONEON-NOSVE-NEXT: smull x12, w12, w16 +; NONEON-NOSVE-NEXT: lsr x14, x14, #32 +; NONEON-NOSVE-NEXT: ldpsw x16, x18, [sp, #88] +; NONEON-NOSVE-NEXT: smull x11, w11, w1 +; NONEON-NOSVE-NEXT: lsr x13, x13, #32 +; NONEON-NOSVE-NEXT: stp w14, w15, [sp, #152] +; NONEON-NOSVE-NEXT: smull x10, w10, w17 +; NONEON-NOSVE-NEXT: lsr x12, x12, #32 +; NONEON-NOSVE-NEXT: smull x9, w9, w18 +; NONEON-NOSVE-NEXT: smull x8, w8, w16 +; NONEON-NOSVE-NEXT: lsr x11, x11, #32 +; NONEON-NOSVE-NEXT: stp w12, w13, [sp, #144] +; NONEON-NOSVE-NEXT: lsr x10, x10, #32 +; NONEON-NOSVE-NEXT: lsr x9, x9, #32 +; NONEON-NOSVE-NEXT: lsr x8, x8, #32 +; NONEON-NOSVE-NEXT: stp w10, w11, [sp, #136] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #128] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %1 = sext <8 x i32> %op1 to <8 x i64> @@ -340,6 +1096,18 @@ define <1 x i64> @smulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; SVE2-NEXT: smulh z0.d, z0.d, z1.d ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: fmov x9, d1 +; NONEON-NOSVE-NEXT: smulh x8, x8, x9 +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <1 x i128> undef, i128 64, i128 0 %splat = shufflevector <1 x i128> %insert, <1 x i128> undef, <1 x i32> zeroinitializer %1 = sext <1 x i64> %op1 to <1 x i128> @@ -367,6 +1135,21 @@ define <2 x i64> @smulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; SVE2-NEXT: smulh z0.d, z0.d, z1.d ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp x9, x8, [sp] +; NONEON-NOSVE-NEXT: ldp x11, x10, [sp, #16] +; NONEON-NOSVE-NEXT: smulh x8, x8, x10 +; NONEON-NOSVE-NEXT: smulh x9, x9, x11 +; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %1 = sext <2 x i64> %op1 to <2 x i128> %2 = sext <2 x i64> %op2 to <2 x i128> %mul = mul <2 x i128> %1, %2 @@ -395,6 +1178,33 @@ define void @smulh_v4i64(ptr %a, ptr %b) { ; SVE2-NEXT: smulh z1.d, z2.d, z3.d ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #128 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q2, [sp] +; NONEON-NOSVE-NEXT: ldp x11, x10, [sp] +; NONEON-NOSVE-NEXT: stp q0, q3, [sp, #32] +; NONEON-NOSVE-NEXT: ldp x13, x12, [sp, #16] +; NONEON-NOSVE-NEXT: ldp x9, x8, [sp, #32] +; NONEON-NOSVE-NEXT: smulh x10, x10, x12 +; NONEON-NOSVE-NEXT: ldp x14, x12, [sp, #48] +; NONEON-NOSVE-NEXT: smulh x11, x11, x13 +; NONEON-NOSVE-NEXT: smulh x8, x8, x12 +; NONEON-NOSVE-NEXT: smulh x9, x9, x14 +; NONEON-NOSVE-NEXT: stp x11, x10, [sp, #64] +; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #80] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %1 = sext <4 x i64> %op1 to <4 x i128> @@ -433,6 +1243,35 @@ define <4 x i8> @umulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; SVE2-NEXT: lsr z0.h, z0.h, #4 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #18] +; NONEON-NOSVE-NEXT: mul w8, w8, w12 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #16] +; NONEON-NOSVE-NEXT: mul w9, w9, w13 +; NONEON-NOSVE-NEXT: mul w10, w10, w14 +; NONEON-NOSVE-NEXT: mul w11, w11, w12 +; NONEON-NOSVE-NEXT: lsr w8, w8, #4 +; NONEON-NOSVE-NEXT: lsr w9, w9, #4 +; NONEON-NOSVE-NEXT: lsr w10, w10, #4 +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: lsr w8, w11, #4 +; NONEON-NOSVE-NEXT: strh w9, [sp, #28] +; NONEON-NOSVE-NEXT: strh w10, [sp, #26] +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %1 = zext <4 x i8> %op1 to <4 x i16> %2 = zext <4 x i8> %op2 to <4 x i16> %mul = mul <4 x i16> %1, %2 @@ -458,6 +1297,55 @@ define <8 x i8> @umulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; SVE2-NEXT: umulh z0.b, z0.b, z1.b ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #15] +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #13] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #22] +; NONEON-NOSVE-NEXT: mul w15, w15, w16 +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w18, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: mul w14, w14, w17 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #10] +; NONEON-NOSVE-NEXT: mul w13, w13, w16 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #11] +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #16] +; NONEON-NOSVE-NEXT: mul w12, w12, w18 +; NONEON-NOSVE-NEXT: lsr w15, w15, #8 +; NONEON-NOSVE-NEXT: ldrb w0, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #18] +; NONEON-NOSVE-NEXT: lsr w14, w14, #8 +; NONEON-NOSVE-NEXT: ldrb w18, [sp, #17] +; NONEON-NOSVE-NEXT: mul w8, w8, w17 +; NONEON-NOSVE-NEXT: lsr w13, w13, #8 +; NONEON-NOSVE-NEXT: mul w11, w11, w0 +; NONEON-NOSVE-NEXT: lsr w12, w12, #8 +; NONEON-NOSVE-NEXT: strb w15, [sp, #31] +; NONEON-NOSVE-NEXT: mul w10, w10, w16 +; NONEON-NOSVE-NEXT: strb w14, [sp, #30] +; NONEON-NOSVE-NEXT: mul w9, w9, w18 +; NONEON-NOSVE-NEXT: lsr w8, w8, #8 +; NONEON-NOSVE-NEXT: strb w13, [sp, #29] +; NONEON-NOSVE-NEXT: lsr w11, w11, #8 +; NONEON-NOSVE-NEXT: strb w12, [sp, #28] +; NONEON-NOSVE-NEXT: lsr w10, w10, #8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: lsr w9, w9, #8 +; NONEON-NOSVE-NEXT: strb w11, [sp, #27] +; NONEON-NOSVE-NEXT: strb w10, [sp, #26] +; NONEON-NOSVE-NEXT: strb w9, [sp, #25] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %1 = zext <8 x i8> %op1 to <8 x i16> %2 = zext <8 x i8> %op2 to <8 x i16> %mul = mul <8 x i16> %1, %2 @@ -483,6 +1371,120 @@ define <16 x i8> @umulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; SVE2-NEXT: umulh z0.b, z0.b, z1.b ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #160 +; NONEON-NOSVE-NEXT: str x27, [sp, #80] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #96] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #112] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #128] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #144] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 +; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 +; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 +; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 +; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 +; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 +; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 +; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 +; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 +; NONEON-NOSVE-NEXT: .cfi_offset w27, -80 +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: ldp d2, d0, [sp] +; NONEON-NOSVE-NEXT: str q1, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #40] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w6, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w7, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w19, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w20, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #49] +; NONEON-NOSVE-NEXT: str d0, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #51] +; NONEON-NOSVE-NEXT: ldrb w21, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w23, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w25, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w26, [sp, #60] +; NONEON-NOSVE-NEXT: str d1, [sp, #88] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #52] +; NONEON-NOSVE-NEXT: mul w20, w20, w21 +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #53] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #54] +; NONEON-NOSVE-NEXT: mul w19, w19, w23 +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #55] +; NONEON-NOSVE-NEXT: ldrb w0, [sp, #40] +; NONEON-NOSVE-NEXT: mul w7, w7, w25 +; NONEON-NOSVE-NEXT: ldrb w2, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w3, [sp, #42] +; NONEON-NOSVE-NEXT: mul w6, w6, w26 +; NONEON-NOSVE-NEXT: lsr w20, w20, #8 +; NONEON-NOSVE-NEXT: ldrb w4, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #88] +; NONEON-NOSVE-NEXT: lsr w19, w19, #8 +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #89] +; NONEON-NOSVE-NEXT: ldrb w18, [sp, #90] +; NONEON-NOSVE-NEXT: lsr w7, w7, #8 +; NONEON-NOSVE-NEXT: ldrb w1, [sp, #91] +; NONEON-NOSVE-NEXT: ldrb w5, [sp, #92] +; NONEON-NOSVE-NEXT: mul w9, w9, w16 +; NONEON-NOSVE-NEXT: lsr w6, w6, #8 +; NONEON-NOSVE-NEXT: ldrb w22, [sp, #93] +; NONEON-NOSVE-NEXT: ldrb w24, [sp, #94] +; NONEON-NOSVE-NEXT: mul w11, w11, w1 +; NONEON-NOSVE-NEXT: ldrb w21, [sp, #95] +; NONEON-NOSVE-NEXT: ldrb w23, [sp, #56] +; NONEON-NOSVE-NEXT: mul w12, w12, w5 +; NONEON-NOSVE-NEXT: ldrb w27, [sp, #59] +; NONEON-NOSVE-NEXT: ldrb w25, [sp, #58] +; NONEON-NOSVE-NEXT: mul w15, w15, w24 +; NONEON-NOSVE-NEXT: ldrb w26, [sp, #57] +; NONEON-NOSVE-NEXT: mul w0, w0, w23 +; NONEON-NOSVE-NEXT: lsr w11, w11, #8 +; NONEON-NOSVE-NEXT: mul w4, w4, w27 +; NONEON-NOSVE-NEXT: lsr w12, w12, #8 +; NONEON-NOSVE-NEXT: lsr w9, w9, #8 +; NONEON-NOSVE-NEXT: mul w3, w3, w25 +; NONEON-NOSVE-NEXT: lsr w15, w15, #8 +; NONEON-NOSVE-NEXT: strb w20, [sp, #79] +; NONEON-NOSVE-NEXT: mul w2, w2, w26 +; NONEON-NOSVE-NEXT: lsr w0, w0, #8 +; NONEON-NOSVE-NEXT: strb w19, [sp, #78] +; NONEON-NOSVE-NEXT: mul w17, w17, w21 +; NONEON-NOSVE-NEXT: lsr w4, w4, #8 +; NONEON-NOSVE-NEXT: strb w7, [sp, #77] +; NONEON-NOSVE-NEXT: mul w13, w13, w22 +; NONEON-NOSVE-NEXT: lsr w3, w3, #8 +; NONEON-NOSVE-NEXT: strb w6, [sp, #76] +; NONEON-NOSVE-NEXT: mul w10, w10, w18 +; NONEON-NOSVE-NEXT: lsr w2, w2, #8 +; NONEON-NOSVE-NEXT: strb w4, [sp, #75] +; NONEON-NOSVE-NEXT: mul w8, w8, w14 +; NONEON-NOSVE-NEXT: lsr w17, w17, #8 +; NONEON-NOSVE-NEXT: strb w3, [sp, #74] +; NONEON-NOSVE-NEXT: lsr w13, w13, #8 +; NONEON-NOSVE-NEXT: strb w2, [sp, #73] +; NONEON-NOSVE-NEXT: ldr x27, [sp, #80] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: lsr w10, w10, #8 +; NONEON-NOSVE-NEXT: strb w0, [sp, #72] +; NONEON-NOSVE-NEXT: lsr w8, w8, #8 +; NONEON-NOSVE-NEXT: strb w17, [sp, #71] +; NONEON-NOSVE-NEXT: strb w15, [sp, #70] +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #144] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w13, [sp, #69] +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #128] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w12, [sp, #68] +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #112] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w11, [sp, #67] +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #96] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w10, [sp, #66] +; NONEON-NOSVE-NEXT: strb w9, [sp, #65] +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] +; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: ret %1 = zext <16 x i8> %op1 to <16 x i16> %2 = zext <16 x i8> %op2 to <16 x i16> %mul = mul <16 x i16> %1, %2 @@ -511,6 +1513,255 @@ define void @umulh_v32i8(ptr %a, ptr %b) { ; SVE2-NEXT: umulh z1.b, z2.b, z3.b ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #384 +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #288] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #304] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #320] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #336] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #352] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #368] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 384 +; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 +; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 +; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 +; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 +; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 +; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 +; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 +; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 +; NONEON-NOSVE-NEXT: .cfi_offset w27, -72 +; NONEON-NOSVE-NEXT: .cfi_offset w28, -80 +; NONEON-NOSVE-NEXT: .cfi_offset w30, -88 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -96 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: mov x29, x0 +; NONEON-NOSVE-NEXT: ldp q3, q2, [x1] +; NONEON-NOSVE-NEXT: str q0, [sp, #128] +; NONEON-NOSVE-NEXT: str q1, [sp, #160] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #128] +; NONEON-NOSVE-NEXT: str q3, [sp, #144] +; NONEON-NOSVE-NEXT: str q2, [sp, #192] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #176] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #160] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #184] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #185] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #186] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #187] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #224] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #144] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #188] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #189] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #229] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #227] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #228] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #190] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #191] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #192] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #176] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #177] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #226] +; NONEON-NOSVE-NEXT: ldrb w2, [sp, #214] +; NONEON-NOSVE-NEXT: ldrb w1, [sp, #215] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #178] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #179] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #240] +; NONEON-NOSVE-NEXT: ldrb w4, [sp, #212] +; NONEON-NOSVE-NEXT: ldrb w3, [sp, #213] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #180] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #181] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #247] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #246] +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #244] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #182] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #183] +; NONEON-NOSVE-NEXT: mul w26, w12, w16 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #242] +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #250] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #232] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #233] +; NONEON-NOSVE-NEXT: mul w30, w10, w12 +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #255] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #253] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #234] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #235] +; NONEON-NOSVE-NEXT: ldrb w0, [sp, #248] +; NONEON-NOSVE-NEXT: ldrb w18, [sp, #249] +; NONEON-NOSVE-NEXT: ldrb w6, [sp, #210] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #236] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #237] +; NONEON-NOSVE-NEXT: ldrb w5, [sp, #211] +; NONEON-NOSVE-NEXT: ldrb w19, [sp, #208] +; NONEON-NOSVE-NEXT: ldrb w7, [sp, #209] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #238] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #239] +; NONEON-NOSVE-NEXT: ldrb w21, [sp, #222] +; NONEON-NOSVE-NEXT: ldrb w20, [sp, #223] +; NONEON-NOSVE-NEXT: ldrb w23, [sp, #220] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #224] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #225] +; NONEON-NOSVE-NEXT: ldrb w22, [sp, #221] +; NONEON-NOSVE-NEXT: ldrb w24, [sp, #219] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #230] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #231] +; NONEON-NOSVE-NEXT: mul w27, w8, w14 +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #245] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #217] +; NONEON-NOSVE-NEXT: mul w9, w9, w15 +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #251] +; NONEON-NOSVE-NEXT: mul w25, w13, w14 +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #243] +; NONEON-NOSVE-NEXT: lsr w14, w27, #8 +; NONEON-NOSVE-NEXT: ldrb w27, [sp, #218] +; NONEON-NOSVE-NEXT: lsr w17, w9, #8 +; NONEON-NOSVE-NEXT: mul w28, w11, w13 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #216] +; NONEON-NOSVE-NEXT: strb w14, [sp, #287] +; NONEON-NOSVE-NEXT: lsr w14, w25, #8 +; NONEON-NOSVE-NEXT: ldr w25, [sp, #24] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #241] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #240] +; NONEON-NOSVE-NEXT: strb w14, [sp, #285] +; NONEON-NOSVE-NEXT: lsr w14, w28, #8 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #254] +; NONEON-NOSVE-NEXT: mul w8, w25, w8 +; NONEON-NOSVE-NEXT: ldr w25, [sp, #28] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #252] +; NONEON-NOSVE-NEXT: strb w14, [sp, #283] +; NONEON-NOSVE-NEXT: ldr w14, [sp, #40] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mul w9, w25, w9 +; NONEON-NOSVE-NEXT: ldr w25, [sp, #32] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w17, [sp, #286] +; NONEON-NOSVE-NEXT: mul w12, w14, w12 +; NONEON-NOSVE-NEXT: lsr w8, w8, #8 +; NONEON-NOSVE-NEXT: lsr w17, w26, #8 +; NONEON-NOSVE-NEXT: mul w10, w25, w10 +; NONEON-NOSVE-NEXT: ldr w25, [sp, #36] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w14, [sp, #44] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: lsr w9, w9, #8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #281] +; NONEON-NOSVE-NEXT: mul w11, w25, w11 +; NONEON-NOSVE-NEXT: strb w17, [sp, #284] +; NONEON-NOSVE-NEXT: lsr w17, w30, #8 +; NONEON-NOSVE-NEXT: mul w13, w14, w13 +; NONEON-NOSVE-NEXT: lsr w8, w10, #8 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #48] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #280] +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #320] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: lsr w9, w11, #8 +; NONEON-NOSVE-NEXT: mul w10, w10, w15 +; NONEON-NOSVE-NEXT: ldr w11, [sp, #52] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w8, [sp, #279] +; NONEON-NOSVE-NEXT: lsr w8, w12, #8 +; NONEON-NOSVE-NEXT: ldr w12, [sp, #56] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mul w11, w11, w16 +; NONEON-NOSVE-NEXT: strb w9, [sp, #278] +; NONEON-NOSVE-NEXT: lsr w9, w13, #8 +; NONEON-NOSVE-NEXT: mul w12, w12, w18 +; NONEON-NOSVE-NEXT: ldr w13, [sp, #60] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w8, [sp, #277] +; NONEON-NOSVE-NEXT: lsr w8, w10, #8 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #64] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #276] +; NONEON-NOSVE-NEXT: mul w13, w13, w0 +; NONEON-NOSVE-NEXT: lsr w9, w11, #8 +; NONEON-NOSVE-NEXT: ldr w11, [sp, #68] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mul w10, w10, w1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #275] +; NONEON-NOSVE-NEXT: lsr w8, w12, #8 +; NONEON-NOSVE-NEXT: mul w11, w11, w2 +; NONEON-NOSVE-NEXT: ldr w12, [sp, #72] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #274] +; NONEON-NOSVE-NEXT: lsr w9, w13, #8 +; NONEON-NOSVE-NEXT: ldr w13, [sp, #76] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w8, [sp, #273] +; NONEON-NOSVE-NEXT: mul w12, w12, w3 +; NONEON-NOSVE-NEXT: lsr w8, w10, #8 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #80] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mul w13, w13, w4 +; NONEON-NOSVE-NEXT: strb w9, [sp, #272] +; NONEON-NOSVE-NEXT: lsr w9, w11, #8 +; NONEON-NOSVE-NEXT: mul w10, w10, w5 +; NONEON-NOSVE-NEXT: ldr w11, [sp, #84] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w8, [sp, #271] +; NONEON-NOSVE-NEXT: lsr w8, w12, #8 +; NONEON-NOSVE-NEXT: ldr w12, [sp, #88] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #270] +; NONEON-NOSVE-NEXT: mul w11, w11, w6 +; NONEON-NOSVE-NEXT: lsr w9, w13, #8 +; NONEON-NOSVE-NEXT: ldr w13, [sp, #92] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mul w12, w12, w7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #269] +; NONEON-NOSVE-NEXT: lsr w8, w10, #8 +; NONEON-NOSVE-NEXT: mul w13, w13, w19 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #96] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #268] +; NONEON-NOSVE-NEXT: lsr w9, w11, #8 +; NONEON-NOSVE-NEXT: ldr w11, [sp, #100] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w8, [sp, #267] +; NONEON-NOSVE-NEXT: mul w10, w10, w20 +; NONEON-NOSVE-NEXT: lsr w8, w12, #8 +; NONEON-NOSVE-NEXT: ldr w12, [sp, #104] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mul w11, w11, w21 +; NONEON-NOSVE-NEXT: strb w9, [sp, #266] +; NONEON-NOSVE-NEXT: lsr w9, w13, #8 +; NONEON-NOSVE-NEXT: ldr w13, [sp, #108] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mul w12, w12, w22 +; NONEON-NOSVE-NEXT: strb w8, [sp, #265] +; NONEON-NOSVE-NEXT: lsr w8, w10, #8 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #112] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #264] +; NONEON-NOSVE-NEXT: mul w13, w13, w23 +; NONEON-NOSVE-NEXT: lsr w9, w11, #8 +; NONEON-NOSVE-NEXT: ldr w11, [sp, #116] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp w15, w14, [sp, #16] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: mul w10, w10, w24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #263] +; NONEON-NOSVE-NEXT: lsr w8, w12, #8 +; NONEON-NOSVE-NEXT: mul w11, w11, w27 +; NONEON-NOSVE-NEXT: ldr w12, [sp, #120] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #262] +; NONEON-NOSVE-NEXT: lsr w9, w13, #8 +; NONEON-NOSVE-NEXT: ldr w13, [sp, #124] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w8, [sp, #261] +; NONEON-NOSVE-NEXT: mul w12, w12, w15 +; NONEON-NOSVE-NEXT: lsr w8, w10, #8 +; NONEON-NOSVE-NEXT: strb w17, [sp, #282] +; NONEON-NOSVE-NEXT: mul w13, w13, w14 +; NONEON-NOSVE-NEXT: strb w9, [sp, #260] +; NONEON-NOSVE-NEXT: lsr w9, w11, #8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #259] +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #368] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: lsr w8, w12, #8 +; NONEON-NOSVE-NEXT: strb w9, [sp, #258] +; NONEON-NOSVE-NEXT: lsr w9, w13, #8 +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #352] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w8, [sp, #257] +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #336] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #256] +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #304] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #256] +; NONEON-NOSVE-NEXT: stp q0, q1, [x29] +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: add sp, sp, #384 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %1 = zext <32 x i8> %op1 to <32 x i16> @@ -545,6 +1796,24 @@ define <2 x i16> @umulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; SVE2-NEXT: lsr z0.s, z0.s, #16 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #20] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #16] +; NONEON-NOSVE-NEXT: mul w8, w8, w10 +; NONEON-NOSVE-NEXT: mul w9, w9, w11 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: lsr w9, w9, #16 +; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %1 = zext <2 x i16> %op1 to <2 x i32> %2 = zext <2 x i16> %op2 to <2 x i32> %mul = mul <2 x i32> %1, %2 @@ -570,6 +1839,35 @@ define <4 x i16> @umulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; SVE2-NEXT: umulh z0.h, z0.h, z1.h ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #14] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #20] +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #18] +; NONEON-NOSVE-NEXT: mul w11, w11, w12 +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #16] +; NONEON-NOSVE-NEXT: mul w10, w10, w13 +; NONEON-NOSVE-NEXT: mul w9, w9, w14 +; NONEON-NOSVE-NEXT: mul w8, w8, w12 +; NONEON-NOSVE-NEXT: lsr w11, w11, #16 +; NONEON-NOSVE-NEXT: lsr w10, w10, #16 +; NONEON-NOSVE-NEXT: lsr w9, w9, #16 +; NONEON-NOSVE-NEXT: strh w11, [sp, #30] +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w10, [sp, #28] +; NONEON-NOSVE-NEXT: strh w9, [sp, #26] +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %1 = zext <4 x i16> %op1 to <4 x i32> %2 = zext <4 x i16> %op2 to <4 x i32> %mul = mul <4 x i32> %1, %2 @@ -595,6 +1893,58 @@ define <8 x i16> @umulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; SVE2-NEXT: umulh z0.h, z0.h, z1.h ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-80]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #42] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w16, [sp, #54] +; NONEON-NOSVE-NEXT: ldrh w17, [sp, #52] +; NONEON-NOSVE-NEXT: ldrh w18, [sp, #50] +; NONEON-NOSVE-NEXT: ldrh w0, [sp, #62] +; NONEON-NOSVE-NEXT: mul w15, w15, w16 +; NONEON-NOSVE-NEXT: ldrh w16, [sp, #48] +; NONEON-NOSVE-NEXT: mul w14, w14, w17 +; NONEON-NOSVE-NEXT: ldrh w17, [sp, #56] +; NONEON-NOSVE-NEXT: mul w13, w13, w18 +; NONEON-NOSVE-NEXT: ldrh w18, [sp, #60] +; NONEON-NOSVE-NEXT: mul w12, w12, w16 +; NONEON-NOSVE-NEXT: ldrh w16, [sp, #58] +; NONEON-NOSVE-NEXT: lsr w15, w15, #16 +; NONEON-NOSVE-NEXT: mul w11, w11, w0 +; NONEON-NOSVE-NEXT: lsr w14, w14, #16 +; NONEON-NOSVE-NEXT: mul w10, w10, w18 +; NONEON-NOSVE-NEXT: lsr w13, w13, #16 +; NONEON-NOSVE-NEXT: strh w15, [sp, #78] +; NONEON-NOSVE-NEXT: mul w9, w9, w16 +; NONEON-NOSVE-NEXT: lsr w12, w12, #16 +; NONEON-NOSVE-NEXT: strh w14, [sp, #76] +; NONEON-NOSVE-NEXT: mul w8, w8, w17 +; NONEON-NOSVE-NEXT: lsr w11, w11, #16 +; NONEON-NOSVE-NEXT: strh w13, [sp, #74] +; NONEON-NOSVE-NEXT: lsr w10, w10, #16 +; NONEON-NOSVE-NEXT: strh w12, [sp, #72] +; NONEON-NOSVE-NEXT: lsr w9, w9, #16 +; NONEON-NOSVE-NEXT: strh w11, [sp, #70] +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w10, [sp, #68] +; NONEON-NOSVE-NEXT: strh w9, [sp, #66] +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %1 = zext <8 x i16> %op1 to <8 x i32> %2 = zext <8 x i16> %op2 to <8 x i32> %mul = mul <8 x i32> %1, %2 @@ -623,6 +1973,129 @@ define void @umulh_v16i16(ptr %a, ptr %b) { ; SVE2-NEXT: umulh z1.h, z2.h, z3.h ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #240 +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #160] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #176] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #192] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #208] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #224] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 240 +; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 +; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 +; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 +; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 +; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 +; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 +; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 +; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 +; NONEON-NOSVE-NEXT: .cfi_offset w27, -72 +; NONEON-NOSVE-NEXT: .cfi_offset w28, -80 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x1] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: str q1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: str q3, [sp, #16] +; NONEON-NOSVE-NEXT: str q2, [sp, #64] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #58] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #60] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #50] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w7, [sp, #96] +; NONEON-NOSVE-NEXT: ldrh w19, [sp, #98] +; NONEON-NOSVE-NEXT: ldrh w20, [sp, #100] +; NONEON-NOSVE-NEXT: ldrh w21, [sp, #102] +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #52] +; NONEON-NOSVE-NEXT: ldrh w16, [sp, #54] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: ldrh w18, [sp, #104] +; NONEON-NOSVE-NEXT: ldrh w2, [sp, #106] +; NONEON-NOSVE-NEXT: ldrh w4, [sp, #108] +; NONEON-NOSVE-NEXT: ldrh w5, [sp, #110] +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #88] +; NONEON-NOSVE-NEXT: ldrh w17, [sp, #90] +; NONEON-NOSVE-NEXT: ldrh w1, [sp, #92] +; NONEON-NOSVE-NEXT: ldrh w3, [sp, #94] +; NONEON-NOSVE-NEXT: mul w8, w8, w15 +; NONEON-NOSVE-NEXT: ldrh w6, [sp, #80] +; NONEON-NOSVE-NEXT: ldrh w23, [sp, #82] +; NONEON-NOSVE-NEXT: mul w11, w11, w3 +; NONEON-NOSVE-NEXT: ldrh w25, [sp, #84] +; NONEON-NOSVE-NEXT: mul w13, w13, w23 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #64] +; NONEON-NOSVE-NEXT: mul w14, w14, w25 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: mul w12, w12, w6 +; NONEON-NOSVE-NEXT: lsr w11, w11, #16 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] +; NONEON-NOSVE-NEXT: mul w10, w10, w1 +; NONEON-NOSVE-NEXT: lsr w13, w13, #16 +; NONEON-NOSVE-NEXT: ldrh w22, [sp, #118] +; NONEON-NOSVE-NEXT: ldrh w24, [sp, #116] +; NONEON-NOSVE-NEXT: ldrh w26, [sp, #114] +; NONEON-NOSVE-NEXT: ldrh w27, [sp, #112] +; NONEON-NOSVE-NEXT: ldrh w28, [sp, #126] +; NONEON-NOSVE-NEXT: mul w9, w9, w17 +; NONEON-NOSVE-NEXT: mul w21, w21, w22 +; NONEON-NOSVE-NEXT: ldrh w22, [sp, #86] +; NONEON-NOSVE-NEXT: lsr w14, w14, #16 +; NONEON-NOSVE-NEXT: mul w20, w20, w24 +; NONEON-NOSVE-NEXT: ldrh w24, [sp, #120] +; NONEON-NOSVE-NEXT: lsr w12, w12, #16 +; NONEON-NOSVE-NEXT: mul w19, w19, w26 +; NONEON-NOSVE-NEXT: ldrh w26, [sp, #124] +; NONEON-NOSVE-NEXT: lsr w10, w10, #16 +; NONEON-NOSVE-NEXT: mul w7, w7, w27 +; NONEON-NOSVE-NEXT: ldrh w27, [sp, #122] +; NONEON-NOSVE-NEXT: lsr w21, w21, #16 +; NONEON-NOSVE-NEXT: mul w5, w5, w28 +; NONEON-NOSVE-NEXT: lsr w20, w20, #16 +; NONEON-NOSVE-NEXT: lsr w9, w9, #16 +; NONEON-NOSVE-NEXT: mul w4, w4, w26 +; NONEON-NOSVE-NEXT: lsr w19, w19, #16 +; NONEON-NOSVE-NEXT: strh w21, [sp, #158] +; NONEON-NOSVE-NEXT: mul w2, w2, w27 +; NONEON-NOSVE-NEXT: lsr w7, w7, #16 +; NONEON-NOSVE-NEXT: strh w20, [sp, #156] +; NONEON-NOSVE-NEXT: mul w18, w18, w24 +; NONEON-NOSVE-NEXT: lsr w5, w5, #16 +; NONEON-NOSVE-NEXT: strh w19, [sp, #154] +; NONEON-NOSVE-NEXT: mul w16, w16, w22 +; NONEON-NOSVE-NEXT: lsr w4, w4, #16 +; NONEON-NOSVE-NEXT: strh w7, [sp, #152] +; NONEON-NOSVE-NEXT: lsr w2, w2, #16 +; NONEON-NOSVE-NEXT: strh w5, [sp, #150] +; NONEON-NOSVE-NEXT: lsr w18, w18, #16 +; NONEON-NOSVE-NEXT: strh w4, [sp, #148] +; NONEON-NOSVE-NEXT: lsr w16, w16, #16 +; NONEON-NOSVE-NEXT: strh w2, [sp, #146] +; NONEON-NOSVE-NEXT: strh w18, [sp, #144] +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #224] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w16, [sp, #142] +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #208] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w14, [sp, #140] +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #192] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w13, [sp, #138] +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #176] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w12, [sp, #136] +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #160] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w11, [sp, #134] +; NONEON-NOSVE-NEXT: strh w10, [sp, #132] +; NONEON-NOSVE-NEXT: strh w9, [sp, #130] +; NONEON-NOSVE-NEXT: strh w8, [sp, #128] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #128] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #240 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %1 = zext <16 x i16> %op1 to <16 x i32> @@ -651,6 +2124,22 @@ define <2 x i32> @umulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; SVE2-NEXT: umulh z0.s, z0.s, z1.s ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w11, w10, [sp, #16] +; NONEON-NOSVE-NEXT: umull x9, w9, w10 +; NONEON-NOSVE-NEXT: umull x8, w8, w11 +; NONEON-NOSVE-NEXT: lsr x9, x9, #32 +; NONEON-NOSVE-NEXT: lsr x8, x8, #32 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %1 = zext <2 x i32> %op1 to <2 x i64> %2 = zext <2 x i32> %op2 to <2 x i64> %mul = mul <2 x i64> %1, %2 @@ -676,6 +2165,32 @@ define <4 x i32> @umulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; SVE2-NEXT: umulh z0.s, z0.s, z1.s ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-80]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldp w10, w11, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldp w13, w12, [sp, #48] +; NONEON-NOSVE-NEXT: umull x11, w11, w12 +; NONEON-NOSVE-NEXT: ldp w12, w14, [sp, #56] +; NONEON-NOSVE-NEXT: umull x10, w10, w13 +; NONEON-NOSVE-NEXT: lsr x11, x11, #32 +; NONEON-NOSVE-NEXT: umull x9, w9, w14 +; NONEON-NOSVE-NEXT: umull x8, w8, w12 +; NONEON-NOSVE-NEXT: lsr x10, x10, #32 +; NONEON-NOSVE-NEXT: lsr x9, x9, #32 +; NONEON-NOSVE-NEXT: stp w10, w11, [sp, #72] +; NONEON-NOSVE-NEXT: lsr x8, x8, #32 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %1 = zext <4 x i32> %op1 to <4 x i64> %2 = zext <4 x i32> %op2 to <4 x i64> %mul = mul <4 x i64> %1, %2 @@ -704,6 +2219,56 @@ define void @umulh_v8i32(ptr %a, ptr %b) { ; SVE2-NEXT: umulh z1.s, z2.s, z3.s ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #160 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x1] +; NONEON-NOSVE-NEXT: str q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q3, [sp] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: str q2, [sp, #64] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldp w10, w11, [sp, #48] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #96] +; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #104] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #64] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] +; NONEON-NOSVE-NEXT: ldp w17, w16, [sp, #112] +; NONEON-NOSVE-NEXT: umull x15, w15, w16 +; NONEON-NOSVE-NEXT: ldp w16, w18, [sp, #120] +; NONEON-NOSVE-NEXT: umull x14, w14, w17 +; NONEON-NOSVE-NEXT: ldp w17, w1, [sp, #80] +; NONEON-NOSVE-NEXT: umull x13, w13, w18 +; NONEON-NOSVE-NEXT: lsr x15, x15, #32 +; NONEON-NOSVE-NEXT: umull x12, w12, w16 +; NONEON-NOSVE-NEXT: lsr x14, x14, #32 +; NONEON-NOSVE-NEXT: ldp w16, w18, [sp, #88] +; NONEON-NOSVE-NEXT: umull x11, w11, w1 +; NONEON-NOSVE-NEXT: lsr x13, x13, #32 +; NONEON-NOSVE-NEXT: stp w14, w15, [sp, #152] +; NONEON-NOSVE-NEXT: umull x10, w10, w17 +; NONEON-NOSVE-NEXT: lsr x12, x12, #32 +; NONEON-NOSVE-NEXT: umull x9, w9, w18 +; NONEON-NOSVE-NEXT: umull x8, w8, w16 +; NONEON-NOSVE-NEXT: lsr x11, x11, #32 +; NONEON-NOSVE-NEXT: stp w12, w13, [sp, #144] +; NONEON-NOSVE-NEXT: lsr x10, x10, #32 +; NONEON-NOSVE-NEXT: lsr x9, x9, #32 +; NONEON-NOSVE-NEXT: lsr x8, x8, #32 +; NONEON-NOSVE-NEXT: stp w10, w11, [sp, #136] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #128] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %insert = insertelement <8 x i64> undef, i64 32, i64 0 @@ -734,6 +2299,18 @@ define <1 x i64> @umulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; SVE2-NEXT: umulh z0.d, z0.d, z1.d ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: fmov x9, d1 +; NONEON-NOSVE-NEXT: umulh x8, x8, x9 +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %1 = zext <1 x i64> %op1 to <1 x i128> %2 = zext <1 x i64> %op2 to <1 x i128> %mul = mul <1 x i128> %1, %2 @@ -759,6 +2336,21 @@ define <2 x i64> @umulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; SVE2-NEXT: umulh z0.d, z0.d, z1.d ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp x9, x8, [sp] +; NONEON-NOSVE-NEXT: ldp x11, x10, [sp, #16] +; NONEON-NOSVE-NEXT: umulh x8, x8, x10 +; NONEON-NOSVE-NEXT: umulh x9, x9, x11 +; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %1 = zext <2 x i64> %op1 to <2 x i128> %2 = zext <2 x i64> %op2 to <2 x i128> %mul = mul <2 x i128> %1, %2 @@ -787,6 +2379,33 @@ define void @umulh_v4i64(ptr %a, ptr %b) { ; SVE2-NEXT: umulh z1.d, z2.d, z3.d ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #128 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q2, [sp] +; NONEON-NOSVE-NEXT: ldp x11, x10, [sp] +; NONEON-NOSVE-NEXT: stp q0, q3, [sp, #32] +; NONEON-NOSVE-NEXT: ldp x13, x12, [sp, #16] +; NONEON-NOSVE-NEXT: ldp x9, x8, [sp, #32] +; NONEON-NOSVE-NEXT: umulh x10, x10, x12 +; NONEON-NOSVE-NEXT: ldp x14, x12, [sp, #48] +; NONEON-NOSVE-NEXT: umulh x11, x11, x13 +; NONEON-NOSVE-NEXT: umulh x8, x8, x12 +; NONEON-NOSVE-NEXT: umulh x9, x9, x14 +; NONEON-NOSVE-NEXT: stp x11, x10, [sp, #64] +; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #80] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %1 = zext <4 x i64> %op1 to <4 x i128> diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll index 1123907f33899..7bdb4599707b0 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -17,6 +18,29 @@ define i8 @uaddv_v8i8(<8 x i8> %a) { ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uaddv_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #11] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #9] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #14] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: add w12, w13, w12 +; NONEON-NOSVE-NEXT: add w10, w11, w10 +; NONEON-NOSVE-NEXT: add w10, w12, w10 +; NONEON-NOSVE-NEXT: add w8, w8, w14 +; NONEON-NOSVE-NEXT: add w8, w10, w8 +; NONEON-NOSVE-NEXT: add w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a) ret i8 %res } @@ -30,6 +54,44 @@ define i8 @uaddv_v16i8(<16 x i8> %a) { ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uaddv_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1] +; NONEON-NOSVE-NEXT: ldrb w11, [sp] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #4] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #3] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #2] +; NONEON-NOSVE-NEXT: add w10, w11, w10 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #8] +; NONEON-NOSVE-NEXT: add w11, w14, w13 +; NONEON-NOSVE-NEXT: add w9, w12, w9 +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #6] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #7] +; NONEON-NOSVE-NEXT: add w10, w10, w11 +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #11] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #13] +; NONEON-NOSVE-NEXT: add w9, w9, w16 +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #9] +; NONEON-NOSVE-NEXT: add w12, w12, w15 +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #14] +; NONEON-NOSVE-NEXT: add w8, w13, w8 +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #15] +; NONEON-NOSVE-NEXT: add w12, w12, w14 +; NONEON-NOSVE-NEXT: add w8, w8, w11 +; NONEON-NOSVE-NEXT: add w9, w10, w9 +; NONEON-NOSVE-NEXT: add w10, w12, w16 +; NONEON-NOSVE-NEXT: add w8, w8, w15 +; NONEON-NOSVE-NEXT: add w9, w9, w10 +; NONEON-NOSVE-NEXT: add w8, w8, w13 +; NONEON-NOSVE-NEXT: add w0, w9, w8 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a) ret i8 %res } @@ -44,6 +106,77 @@ define i8 @uaddv_v32i8(ptr %a) { ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uaddv_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w11, [sp] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #2] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #3] +; NONEON-NOSVE-NEXT: add w9, w11, w10 +; NONEON-NOSVE-NEXT: add w10, w13, w12 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #5] +; NONEON-NOSVE-NEXT: add w11, w15, w14 +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #4] +; NONEON-NOSVE-NEXT: add w9, w10, w11 +; NONEON-NOSVE-NEXT: add w10, w13, w12 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #6] +; NONEON-NOSVE-NEXT: add w14, w17, w16 +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #8] +; NONEON-NOSVE-NEXT: add w10, w14, w10 +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #7] +; NONEON-NOSVE-NEXT: add w11, w12, w11 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #24] +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: add w9, w10, w11 +; NONEON-NOSVE-NEXT: add w10, w14, w13 +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #9] +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: add w11, w15, w12 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #26] +; NONEON-NOSVE-NEXT: add w9, w10, w11 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #10] +; NONEON-NOSVE-NEXT: add w10, w13, w12 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #11] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #12] +; NONEON-NOSVE-NEXT: add w9, w9, w10 +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #14] +; NONEON-NOSVE-NEXT: add w11, w12, w11 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #15] +; NONEON-NOSVE-NEXT: add w10, w13, w10 +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #13] +; NONEON-NOSVE-NEXT: add w14, w15, w14 +; NONEON-NOSVE-NEXT: add w10, w11, w10 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #30] +; NONEON-NOSVE-NEXT: add w9, w9, w14 +; NONEON-NOSVE-NEXT: add w12, w13, w12 +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #31] +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: add w10, w10, w12 +; NONEON-NOSVE-NEXT: add w11, w16, w11 +; NONEON-NOSVE-NEXT: add w10, w10, w11 +; NONEON-NOSVE-NEXT: add w11, w17, w13 +; NONEON-NOSVE-NEXT: add w9, w10, w11 +; NONEON-NOSVE-NEXT: add w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %res = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %op) ret i8 %res @@ -58,6 +191,21 @@ define i16 @uaddv_v4i16(<4 x i16> %a) { ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uaddv_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #8] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: add w10, w11, w10 +; NONEON-NOSVE-NEXT: add w0, w10, w8 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a) ret i16 %res } @@ -71,6 +219,28 @@ define i16 @uaddv_v8i16(<8 x i16> %a) { ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uaddv_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #6] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #4] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w13, [sp] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: add w12, w13, w12 +; NONEON-NOSVE-NEXT: add w10, w11, w10 +; NONEON-NOSVE-NEXT: add w10, w12, w10 +; NONEON-NOSVE-NEXT: add w8, w8, w14 +; NONEON-NOSVE-NEXT: add w8, w10, w8 +; NONEON-NOSVE-NEXT: add w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a) ret i16 %res } @@ -85,6 +255,45 @@ define i16 @uaddv_v16i16(ptr %a) { ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uaddv_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w11, [sp] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #20] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #4] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #6] +; NONEON-NOSVE-NEXT: add w9, w11, w10 +; NONEON-NOSVE-NEXT: add w12, w13, w12 +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #10] +; NONEON-NOSVE-NEXT: add w13, w15, w14 +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w16, [sp, #24] +; NONEON-NOSVE-NEXT: ldrh w17, [sp, #8] +; NONEON-NOSVE-NEXT: add w9, w12, w13 +; NONEON-NOSVE-NEXT: add w10, w11, w10 +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #12] +; NONEON-NOSVE-NEXT: add w14, w17, w16 +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #14] +; NONEON-NOSVE-NEXT: add w10, w14, w10 +; NONEON-NOSVE-NEXT: add w11, w15, w11 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: add w9, w10, w11 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: add w9, w13, w12 +; NONEON-NOSVE-NEXT: add w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %op) ret i16 %res @@ -99,6 +308,16 @@ define i32 @uaddv_v2i32(<2 x i32> %a) { ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uaddv_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] +; NONEON-NOSVE-NEXT: add w0, w9, w8 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a) ret i32 %res } @@ -112,6 +331,17 @@ define i32 @uaddv_v4i32(<4 x i32> %a) { ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uaddv_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w11, w10, [sp], #16 +; NONEON-NOSVE-NEXT: add w10, w11, w10 +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: add w0, w10, w8 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a) ret i32 %res } @@ -126,6 +356,25 @@ define i32 @uaddv_v8i32(ptr %a) { ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uaddv_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldp w11, w10, [sp] +; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #24] +; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #8] +; NONEON-NOSVE-NEXT: add w8, w10, w8 +; NONEON-NOSVE-NEXT: add w9, w11, w9 +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: add w10, w14, w12 +; NONEON-NOSVE-NEXT: add w11, w15, w13 +; NONEON-NOSVE-NEXT: add w9, w10, w11 +; NONEON-NOSVE-NEXT: add w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %op) ret i32 %res @@ -139,6 +388,14 @@ define i64 @uaddv_v2i64(<2 x i64> %a) { ; CHECK-NEXT: uaddv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uaddv_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp x9, x8, [sp], #16 +; NONEON-NOSVE-NEXT: add x0, x9, x8 +; NONEON-NOSVE-NEXT: ret %res = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a) ret i64 %res } @@ -152,6 +409,18 @@ define i64 @uaddv_v4i64(ptr %a) { ; CHECK-NEXT: uaddv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uaddv_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp x9, x8, [sp, #16] +; NONEON-NOSVE-NEXT: ldp x11, x10, [sp], #32 +; NONEON-NOSVE-NEXT: add x8, x10, x8 +; NONEON-NOSVE-NEXT: add x9, x11, x9 +; NONEON-NOSVE-NEXT: add x0, x9, x8 +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %op) ret i64 %res @@ -169,6 +438,36 @@ define i8 @smaxv_v8i8(<8 x i8> %a) { ; CHECK-NEXT: smaxv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smaxv_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #9] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, gt +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %a) ret i8 %res } @@ -181,6 +480,59 @@ define i8 @smaxv_v16i8(<16 x i8> %a) { ; CHECK-NEXT: smaxv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smaxv_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #1] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp] +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #2] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #3] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #5] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #7] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, gt +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %a) ret i8 %res } @@ -194,6 +546,108 @@ define i8 @smaxv_v32i8(ptr %a) { ; CHECK-NEXT: smaxv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smaxv_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #1] +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #16] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #18] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #2] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #19] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #3] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #20] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #21] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #5] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #22] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #23] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #7] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #24] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #25] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #9] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #26] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #27] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #11] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #28] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #29] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #13] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #31] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, gt +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %res = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> %op) ret i8 %res @@ -207,6 +661,24 @@ define i16 @smaxv_v4i16(<4 x i16> %a) { ; CHECK-NEXT: smaxv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smaxv_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, gt +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %a) ret i16 %res } @@ -219,6 +691,35 @@ define i16 @smaxv_v8i16(<8 x i16> %a) { ; CHECK-NEXT: smaxv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smaxv_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #2] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp] +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, gt +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %a) ret i16 %res } @@ -232,6 +733,60 @@ define i16 @smaxv_v16i16(ptr %a) { ; CHECK-NEXT: smaxv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smaxv_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #2] +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #16] +; NONEON-NOSVE-NEXT: ldrsh w11, [sp] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #20] +; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #22] +; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #24] +; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #26] +; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #28] +; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, gt +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %op) ret i16 %res @@ -245,6 +800,17 @@ define i32 @smaxv_v2i32(<2 x i32> %a) { ; CHECK-NEXT: smaxv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smaxv_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w0, w9, w8, gt +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> %a) ret i32 %res } @@ -257,6 +823,21 @@ define i32 @smaxv_v4i32(<4 x i32> %a) { ; CHECK-NEXT: smaxv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smaxv_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldp w10, w9, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, gt +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a) ret i32 %res } @@ -270,6 +851,32 @@ define i32 @smaxv_v8i32(ptr %a) { ; CHECK-NEXT: smaxv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smaxv_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp w11, w8, [sp] +; NONEON-NOSVE-NEXT: ldp w10, w9, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldp w10, w12, [sp, #8] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt +; NONEON-NOSVE-NEXT: ldp w11, w9, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w10, w11 +; NONEON-NOSVE-NEXT: csel w10, w10, w11, gt +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt +; NONEON-NOSVE-NEXT: cmp w12, w9 +; NONEON-NOSVE-NEXT: csel w9, w12, w9, gt +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, gt +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %op) ret i32 %res @@ -284,6 +891,15 @@ define i64 @smaxv_v2i64(<2 x i64> %a) { ; CHECK-NEXT: smaxv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smaxv_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp x9, x8, [sp], #16 +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csel x0, x9, x8, gt +; NONEON-NOSVE-NEXT: ret %res = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %a) ret i64 %res } @@ -297,6 +913,22 @@ define i64 @smaxv_v4i64(ptr %a) { ; CHECK-NEXT: smaxv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smaxv_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp x8, x10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr x11, [sp], #32 +; NONEON-NOSVE-NEXT: cmp x8, x9 +; NONEON-NOSVE-NEXT: csel x8, x8, x9, gt +; NONEON-NOSVE-NEXT: cmp x11, x10 +; NONEON-NOSVE-NEXT: csel x9, x11, x10, gt +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csel x0, x9, x8, gt +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %op) ret i64 %res @@ -314,6 +946,36 @@ define i8 @sminv_v8i8(<8 x i8> %a) { ; CHECK-NEXT: sminv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sminv_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #9] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, lt +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %a) ret i8 %res } @@ -326,6 +988,59 @@ define i8 @sminv_v16i8(<16 x i8> %a) { ; CHECK-NEXT: sminv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sminv_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #1] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp] +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #2] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #3] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #5] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #7] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, lt +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %a) ret i8 %res } @@ -339,6 +1054,108 @@ define i8 @sminv_v32i8(ptr %a) { ; CHECK-NEXT: sminv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sminv_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #1] +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #16] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #18] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #2] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #19] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #3] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #20] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #21] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #5] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #22] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #23] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #7] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #24] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #25] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #9] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #26] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #27] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #11] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #28] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #29] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #13] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt +; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #31] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, lt +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %res = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> %op) ret i8 %res @@ -352,6 +1169,24 @@ define i16 @sminv_v4i16(<4 x i16> %a) { ; CHECK-NEXT: sminv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sminv_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, lt +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %a) ret i16 %res } @@ -364,6 +1199,35 @@ define i16 @sminv_v8i16(<8 x i16> %a) { ; CHECK-NEXT: sminv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sminv_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #2] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp] +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, lt +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %a) ret i16 %res } @@ -377,6 +1241,60 @@ define i16 @sminv_v16i16(ptr %a) { ; CHECK-NEXT: sminv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sminv_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #2] +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #16] +; NONEON-NOSVE-NEXT: ldrsh w11, [sp] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #20] +; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #22] +; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #24] +; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #26] +; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #28] +; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt +; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, lt +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %op) ret i16 %res @@ -390,6 +1308,17 @@ define i32 @sminv_v2i32(<2 x i32> %a) { ; CHECK-NEXT: sminv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sminv_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w0, w9, w8, lt +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> %a) ret i32 %res } @@ -402,6 +1331,21 @@ define i32 @sminv_v4i32(<4 x i32> %a) { ; CHECK-NEXT: sminv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sminv_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldp w10, w9, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, lt +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %a) ret i32 %res } @@ -415,6 +1359,32 @@ define i32 @sminv_v8i32(ptr %a) { ; CHECK-NEXT: sminv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sminv_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp w11, w8, [sp] +; NONEON-NOSVE-NEXT: ldp w10, w9, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldp w10, w12, [sp, #8] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt +; NONEON-NOSVE-NEXT: ldp w11, w9, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w10, w11 +; NONEON-NOSVE-NEXT: csel w10, w10, w11, lt +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt +; NONEON-NOSVE-NEXT: cmp w12, w9 +; NONEON-NOSVE-NEXT: csel w9, w12, w9, lt +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, lt +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %op) ret i32 %res @@ -429,6 +1399,15 @@ define i64 @sminv_v2i64(<2 x i64> %a) { ; CHECK-NEXT: sminv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sminv_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp x9, x8, [sp], #16 +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csel x0, x9, x8, lt +; NONEON-NOSVE-NEXT: ret %res = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %a) ret i64 %res } @@ -442,6 +1421,22 @@ define i64 @sminv_v4i64(ptr %a) { ; CHECK-NEXT: sminv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sminv_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp x8, x10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr x11, [sp], #32 +; NONEON-NOSVE-NEXT: cmp x8, x9 +; NONEON-NOSVE-NEXT: csel x8, x8, x9, lt +; NONEON-NOSVE-NEXT: cmp x11, x10 +; NONEON-NOSVE-NEXT: csel x9, x11, x10, lt +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csel x0, x9, x8, lt +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %op) ret i64 %res @@ -459,6 +1454,36 @@ define i8 @umaxv_v8i8(<8 x i8> %a) { ; CHECK-NEXT: umaxv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umaxv_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, hi +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %a) ret i8 %res } @@ -471,6 +1496,59 @@ define i8 @umaxv_v16i8(<16 x i8> %a) { ; CHECK-NEXT: umaxv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umaxv_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #2] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, hi +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %a) ret i8 %res } @@ -484,6 +1562,108 @@ define i8 @umaxv_v32i8(ptr %a) { ; CHECK-NEXT: umaxv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umaxv_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w11, [sp] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #2] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #3] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #5] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #7] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #9] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #11] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #13] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, hi +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %res = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> %op) ret i8 %res @@ -497,6 +1677,24 @@ define i16 @umaxv_v4i16(<4 x i16> %a) { ; CHECK-NEXT: umaxv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umaxv_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, hi +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %a) ret i16 %res } @@ -509,6 +1707,35 @@ define i16 @umaxv_v8i16(<8 x i16> %a) { ; CHECK-NEXT: umaxv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umaxv_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, hi +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %a) ret i16 %res } @@ -522,6 +1749,60 @@ define i16 @umaxv_v16i16(ptr %a) { ; CHECK-NEXT: umaxv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umaxv_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w11, [sp] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #20] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #24] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, hi +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %op) ret i16 %res @@ -535,6 +1816,17 @@ define i32 @umaxv_v2i32(<2 x i32> %a) { ; CHECK-NEXT: umaxv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umaxv_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w0, w9, w8, hi +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> %a) ret i32 %res } @@ -547,6 +1839,21 @@ define i32 @umaxv_v4i32(<4 x i32> %a) { ; CHECK-NEXT: umaxv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umaxv_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldp w10, w9, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, hi +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %a) ret i32 %res } @@ -560,6 +1867,32 @@ define i32 @umaxv_v8i32(ptr %a) { ; CHECK-NEXT: umaxv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umaxv_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp w11, w8, [sp] +; NONEON-NOSVE-NEXT: ldp w10, w9, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldp w10, w12, [sp, #8] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: ldp w11, w9, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w10, w11 +; NONEON-NOSVE-NEXT: csel w10, w10, w11, hi +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi +; NONEON-NOSVE-NEXT: cmp w12, w9 +; NONEON-NOSVE-NEXT: csel w9, w12, w9, hi +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, hi +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %op) ret i32 %res @@ -574,6 +1907,15 @@ define i64 @umaxv_v2i64(<2 x i64> %a) { ; CHECK-NEXT: umaxv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umaxv_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp x9, x8, [sp], #16 +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csel x0, x9, x8, hi +; NONEON-NOSVE-NEXT: ret %res = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %a) ret i64 %res } @@ -587,6 +1929,22 @@ define i64 @umaxv_v4i64(ptr %a) { ; CHECK-NEXT: umaxv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umaxv_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp x8, x10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr x11, [sp], #32 +; NONEON-NOSVE-NEXT: cmp x8, x9 +; NONEON-NOSVE-NEXT: csel x8, x8, x9, hi +; NONEON-NOSVE-NEXT: cmp x11, x10 +; NONEON-NOSVE-NEXT: csel x9, x11, x10, hi +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csel x0, x9, x8, hi +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %op) ret i64 %res @@ -604,6 +1962,36 @@ define i8 @uminv_v8i8(<8 x i8> %a) { ; CHECK-NEXT: uminv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uminv_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, lo +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %a) ret i8 %res } @@ -616,6 +2004,59 @@ define i8 @uminv_v16i8(<16 x i8> %a) { ; CHECK-NEXT: uminv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uminv_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #2] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, lo +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %a) ret i8 %res } @@ -629,6 +2070,108 @@ define i8 @uminv_v32i8(ptr %a) { ; CHECK-NEXT: uminv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uminv_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w11, [sp] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #2] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #3] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #5] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #7] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #9] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #11] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #13] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #15] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, lo +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %res = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> %op) ret i8 %res @@ -642,6 +2185,24 @@ define i16 @uminv_v4i16(<4 x i16> %a) { ; CHECK-NEXT: uminv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uminv_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, lo +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %a) ret i16 %res } @@ -654,6 +2215,35 @@ define i16 @uminv_v8i16(<8 x i16> %a) { ; CHECK-NEXT: uminv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uminv_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, lo +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %a) ret i16 %res } @@ -667,6 +2257,60 @@ define i16 @uminv_v16i16(ptr %a) { ; CHECK-NEXT: uminv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uminv_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w11, [sp] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #20] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #24] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #10] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #14] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, lo +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %op) ret i16 %res @@ -680,6 +2324,17 @@ define i32 @uminv_v2i32(<2 x i32> %a) { ; CHECK-NEXT: uminv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uminv_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w0, w9, w8, lo +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> %a) ret i32 %res } @@ -692,6 +2347,21 @@ define i32 @uminv_v4i32(<4 x i32> %a) { ; CHECK-NEXT: uminv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uminv_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldp w10, w9, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, lo +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %a) ret i32 %res } @@ -705,6 +2375,32 @@ define i32 @uminv_v8i32(ptr %a) { ; CHECK-NEXT: uminv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uminv_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp w11, w8, [sp] +; NONEON-NOSVE-NEXT: ldp w10, w9, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldp w10, w12, [sp, #8] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: ldp w11, w9, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w10, w11 +; NONEON-NOSVE-NEXT: csel w10, w10, w11, lo +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo +; NONEON-NOSVE-NEXT: cmp w12, w9 +; NONEON-NOSVE-NEXT: csel w9, w12, w9, lo +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w0, w8, w9, lo +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %op) ret i32 %res @@ -719,6 +2415,15 @@ define i64 @uminv_v2i64(<2 x i64> %a) { ; CHECK-NEXT: uminv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uminv_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp x9, x8, [sp], #16 +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csel x0, x9, x8, lo +; NONEON-NOSVE-NEXT: ret %res = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %a) ret i64 %res } @@ -732,6 +2437,22 @@ define i64 @uminv_v4i64(ptr %a) { ; CHECK-NEXT: uminv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uminv_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp x8, x10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr x11, [sp], #32 +; NONEON-NOSVE-NEXT: cmp x8, x9 +; NONEON-NOSVE-NEXT: csel x8, x8, x9, lo +; NONEON-NOSVE-NEXT: cmp x11, x10 +; NONEON-NOSVE-NEXT: csel x9, x11, x10, lo +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csel x0, x9, x8, lo +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %op) ret i64 %res diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll index 4ae7586fca169..cb1fb20ec9d8d 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -24,6 +25,35 @@ define <4 x i8> @srem_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #20] +; NONEON-NOSVE-NEXT: ldrsb w12, [sp, #12] +; NONEON-NOSVE-NEXT: ldrsb w14, [sp, #18] +; NONEON-NOSVE-NEXT: ldrsb w15, [sp, #10] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w17, [sp, #16] +; NONEON-NOSVE-NEXT: ldrsb w18, [sp, #8] +; NONEON-NOSVE-NEXT: sdiv w13, w12, w11 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: sdiv w16, w15, w14 +; NONEON-NOSVE-NEXT: msub w9, w13, w11, w12 +; NONEON-NOSVE-NEXT: strh w9, [sp, #28] +; NONEON-NOSVE-NEXT: sdiv w0, w18, w17 +; NONEON-NOSVE-NEXT: msub w10, w16, w14, w15 +; NONEON-NOSVE-NEXT: strh w10, [sp, #26] +; NONEON-NOSVE-NEXT: msub w8, w0, w17, w18 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = srem <4 x i8> %op1, %op2 ret <4 x i8> %res } @@ -53,6 +83,55 @@ define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = srem <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -102,6 +181,94 @@ define <16 x i8> @srem_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: mls z0.b, p0/m, z3.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #29] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #27] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #25] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = srem <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -189,6 +356,179 @@ define void @srem_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: mls z2.b, p0/m, z7.b, z4.b ; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #47] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #95] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #62] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #61] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #93] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #60] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #59] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #91] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #58] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #57] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #89] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #56] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #55] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #54] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #53] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #85] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #52] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #51] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #50] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #49] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #81] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #48] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #31] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #79] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #29] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #77] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #27] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #75] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #25] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #73] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #71] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #69] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #67] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #65] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = srem <32 x i8> %op1, %op2 @@ -210,6 +550,35 @@ define <4 x i16> @srem_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = srem <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -238,6 +607,54 @@ define <8 x i16> @srem_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: mls z0.h, p0/m, z3.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #26] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #22] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = srem <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -282,6 +699,99 @@ define void @srem_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: mls z0.h, p0/m, z7.h, z1.h ; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #46] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #44] +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #60] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #42] +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #58] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #40] +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #56] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #38] +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #54] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #36] +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #52] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #34] +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #50] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #48] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #30] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #26] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #22] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = srem <16 x i16> %op1, %op2 @@ -300,6 +810,24 @@ define <2 x i32> @srem_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w11, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: sdiv w10, w11, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w11 +; NONEON-NOSVE-NEXT: str w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: str w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = srem <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -315,6 +843,32 @@ define <4 x i32> @srem_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp w9, w11, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: sdiv w10, w11, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w11 +; NONEON-NOSVE-NEXT: str w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w11, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #4] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #36] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: str w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = srem <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -334,6 +888,54 @@ define void @srem_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: mls z1.s, p0/m, z5.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w9, w11, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] +; NONEON-NOSVE-NEXT: sdiv w10, w11, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w11 +; NONEON-NOSVE-NEXT: str w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w11, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #36] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #84] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w11, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #76] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldp w9, w11, [sp, #4] +; NONEON-NOSVE-NEXT: sdiv w10, w11, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w11 +; NONEON-NOSVE-NEXT: str w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w11, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr w9, [sp] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = srem <8 x i32> %op1, %op2 @@ -352,6 +954,19 @@ define <1 x i64> @srem_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: sdiv x10, x9, x8 +; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9 +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = srem <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -367,6 +982,23 @@ define <2 x i64> @srem_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x9, x11, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: sdiv x10, x11, x8 +; NONEON-NOSVE-NEXT: msub x8, x10, x8, x11 +; NONEON-NOSVE-NEXT: str x8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: sdiv x10, x9, x8 +; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9 +; NONEON-NOSVE-NEXT: str x8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = srem <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -386,6 +1018,37 @@ define void @srem_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: mls z1.d, p0/m, z5.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp x9, x11, [sp, #32] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #56] +; NONEON-NOSVE-NEXT: sdiv x10, x11, x8 +; NONEON-NOSVE-NEXT: msub x8, x10, x8, x11 +; NONEON-NOSVE-NEXT: str x8, [sp, #88] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #48] +; NONEON-NOSVE-NEXT: sdiv x10, x9, x8 +; NONEON-NOSVE-NEXT: msub x11, x10, x8, x9 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #8] +; NONEON-NOSVE-NEXT: sdiv x10, x9, x8 +; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9 +; NONEON-NOSVE-NEXT: ldr x9, [sp] +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #72] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: sdiv x10, x9, x8 +; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9 +; NONEON-NOSVE-NEXT: str x8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = srem <4 x i64> %op1, %op2 @@ -413,6 +1076,35 @@ define <4 x i8> @urem_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #10] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w18, [sp, #8] +; NONEON-NOSVE-NEXT: udiv w13, w12, w11 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: udiv w16, w15, w14 +; NONEON-NOSVE-NEXT: msub w9, w13, w11, w12 +; NONEON-NOSVE-NEXT: strh w9, [sp, #28] +; NONEON-NOSVE-NEXT: udiv w0, w18, w17 +; NONEON-NOSVE-NEXT: msub w10, w16, w14, w15 +; NONEON-NOSVE-NEXT: strh w10, [sp, #26] +; NONEON-NOSVE-NEXT: msub w8, w0, w17, w18 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = urem <4 x i8> %op1, %op2 ret <4 x i8> %res } @@ -442,6 +1134,55 @@ define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = urem <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -491,6 +1232,94 @@ define <16 x i8> @urem_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: mls z0.b, p0/m, z3.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = urem <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -578,6 +1407,179 @@ define void @urem_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: mls z2.b, p0/m, z7.b, z4.b ; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #47] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #95] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #62] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #61] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #93] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #60] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #59] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #91] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #58] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #57] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #89] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #56] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #55] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #54] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #53] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #85] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #52] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #51] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #50] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #49] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #81] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #48] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #79] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #77] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #75] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #73] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #71] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #69] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #67] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #65] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = urem <32 x i8> %op1, %op2 @@ -599,6 +1601,35 @@ define <4 x i16> @urem_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = urem <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -627,6 +1658,54 @@ define <8 x i16> @urem_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: mls z0.h, p0/m, z3.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = urem <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -671,6 +1750,99 @@ define void @urem_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: mls z0.h, p0/m, z7.h, z1.h ; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #46] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #44] +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #42] +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #58] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #40] +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #38] +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #54] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #36] +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #34] +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #50] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = urem <16 x i16> %op1, %op2 @@ -689,6 +1861,24 @@ define <2 x i32> @urem_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w11, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: udiv w10, w11, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w11 +; NONEON-NOSVE-NEXT: str w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: str w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = urem <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -704,6 +1894,32 @@ define <4 x i32> @urem_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp w9, w11, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: udiv w10, w11, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w11 +; NONEON-NOSVE-NEXT: str w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w11, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #4] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #36] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: str w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = urem <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -723,6 +1939,54 @@ define void @urem_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: mls z1.s, p0/m, z5.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w9, w11, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] +; NONEON-NOSVE-NEXT: udiv w10, w11, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w11 +; NONEON-NOSVE-NEXT: str w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w11, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #36] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #84] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w11, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #76] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldp w9, w11, [sp, #4] +; NONEON-NOSVE-NEXT: udiv w10, w11, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w11 +; NONEON-NOSVE-NEXT: str w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w11, w10, w8, w9 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr w9, [sp] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = urem <8 x i32> %op1, %op2 @@ -741,6 +2005,19 @@ define <1 x i64> @urem_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: udiv x10, x9, x8 +; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9 +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = urem <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -756,6 +2033,23 @@ define <2 x i64> @urem_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x9, x11, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: udiv x10, x11, x8 +; NONEON-NOSVE-NEXT: msub x8, x10, x8, x11 +; NONEON-NOSVE-NEXT: str x8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: udiv x10, x9, x8 +; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9 +; NONEON-NOSVE-NEXT: str x8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = urem <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -775,6 +2069,37 @@ define void @urem_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: mls z1.d, p0/m, z5.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp x9, x11, [sp, #32] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #56] +; NONEON-NOSVE-NEXT: udiv x10, x11, x8 +; NONEON-NOSVE-NEXT: msub x8, x10, x8, x11 +; NONEON-NOSVE-NEXT: str x8, [sp, #88] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #48] +; NONEON-NOSVE-NEXT: udiv x10, x9, x8 +; NONEON-NOSVE-NEXT: msub x11, x10, x8, x9 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #8] +; NONEON-NOSVE-NEXT: udiv x10, x9, x8 +; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9 +; NONEON-NOSVE-NEXT: ldr x9, [sp] +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #72] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: udiv x10, x9, x8 +; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9 +; NONEON-NOSVE-NEXT: str x8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = urem <4 x i64> %op1, %op2 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll index bfffe4b6315d7..5cee1360f6f3c 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -16,6 +17,32 @@ define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <4 x i8> %op1, <4 x i8> %op2 ret <4 x i8> %sel } @@ -31,6 +58,48 @@ define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <8 x i8> %op1, <8 x i8> %op2 ret <8 x i8> %sel } @@ -46,6 +115,79 @@ define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <16 x i8> %op1, <16 x i8> %op2 ret <16 x i8> %sel } @@ -64,6 +206,151 @@ define void @select_v32i8(ptr %a, ptr %b, i1 %mask) { ; CHECK-NEXT: sel z1.b, p0, z1.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: tst w2, #0x1 +; NONEON-NOSVE-NEXT: ldr q2, [x1] +; NONEON-NOSVE-NEXT: ldr q3, [x1, #16] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: stp q1, q3, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #47] +; NONEON-NOSVE-NEXT: str q2, [sp, #48] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #79] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #77] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #75] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #73] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #71] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #69] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #67] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #65] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #63] +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #62] +; NONEON-NOSVE-NEXT: strb w8, [sp, #95] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #61] +; NONEON-NOSVE-NEXT: strb w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #60] +; NONEON-NOSVE-NEXT: strb w8, [sp, #93] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #59] +; NONEON-NOSVE-NEXT: strb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #58] +; NONEON-NOSVE-NEXT: strb w8, [sp, #91] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #57] +; NONEON-NOSVE-NEXT: strb w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #56] +; NONEON-NOSVE-NEXT: strb w8, [sp, #89] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #55] +; NONEON-NOSVE-NEXT: strb w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #54] +; NONEON-NOSVE-NEXT: strb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #53] +; NONEON-NOSVE-NEXT: strb w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #52] +; NONEON-NOSVE-NEXT: strb w8, [sp, #85] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #51] +; NONEON-NOSVE-NEXT: strb w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #50] +; NONEON-NOSVE-NEXT: strb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #49] +; NONEON-NOSVE-NEXT: strb w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #48] +; NONEON-NOSVE-NEXT: strb w8, [sp, #81] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load volatile <32 x i8>, ptr %a %op2 = load volatile <32 x i8>, ptr %b %sel = select i1 %mask, <32 x i8> %op1, <32 x i8> %op2 @@ -83,6 +370,22 @@ define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: csel w11, w10, w8, ne +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <2 x i16> %op1, <2 x i16> %op2 ret <2 x i16> %sel } @@ -99,6 +402,32 @@ define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <4 x i16> %op1, <4 x i16> %op2 ret <4 x i16> %sel } @@ -115,6 +444,47 @@ define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <8 x i16> %op1, <8 x i16> %op2 ret <8 x i16> %sel } @@ -134,6 +504,87 @@ define void @select_v16i16(ptr %a, ptr %b, i1 %mask) { ; CHECK-NEXT: sel z1.h, p0, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: tst w2, #0x1 +; NONEON-NOSVE-NEXT: ldr q2, [x1] +; NONEON-NOSVE-NEXT: ldr q3, [x1, #16] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: stp q1, q3, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #46] +; NONEON-NOSVE-NEXT: str q2, [sp, #48] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #44] +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #42] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #40] +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #38] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #36] +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #34] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #62] +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #60] +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #58] +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #56] +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #54] +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #52] +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #50] +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #48] +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrh w8, [sp] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load volatile <16 x i16>, ptr %a %op2 = load volatile <16 x i16>, ptr %b %sel = select i1 %mask, <16 x i16> %op1, <16 x i16> %op2 @@ -153,6 +604,22 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: csel w11, w10, w8, ne +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <2 x i32> %op1, <2 x i32> %op2 ret <2 x i32> %sel } @@ -169,6 +636,27 @@ define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: csel w11, w10, w8, ne +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: csel w11, w10, w8, ne +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, ne +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <4 x i32> %op1, <4 x i32> %op2 ret <4 x i32> %sel } @@ -188,6 +676,47 @@ define void @select_v8i32(ptr %a, ptr %b, i1 %mask) { ; CHECK-NEXT: sel z1.s, p0, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: tst w2, #0x1 +; NONEON-NOSVE-NEXT: ldr q2, [x1] +; NONEON-NOSVE-NEXT: ldr q3, [x1, #16] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: stp q1, q3, [sp, #16] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: str q2, [sp, #48] +; NONEON-NOSVE-NEXT: csel w11, w8, w10, ne +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #72] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: csel w11, w8, w10, ne +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #56] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #64] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: csel w11, w8, w10, ne +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #48] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #88] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: csel w11, w8, w10, ne +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: csel w8, w8, w9, ne +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load volatile <8 x i32>, ptr %a %op2 = load volatile <8 x i32>, ptr %b %sel = select i1 %mask, <8 x i32> %op1, <8 x i32> %op2 @@ -208,6 +737,19 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: csel x8, x9, x8, ne +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <1 x i64> %op1, <1 x i64> %op2 ret <1 x i64> %sel } @@ -225,6 +767,21 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: csel x11, x10, x8, ne +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: csel x8, x9, x8, ne +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <2 x i64> %op1, <2 x i64> %op2 ret <2 x i64> %sel } @@ -245,6 +802,35 @@ define void @select_v4i64(ptr %a, ptr %b, i1 %mask) { ; CHECK-NEXT: sel z1.d, p0, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: tst w2, #0x1 +; NONEON-NOSVE-NEXT: ldr q2, [x1] +; NONEON-NOSVE-NEXT: ldr q3, [x1, #16] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: stp q1, q3, [sp, #16] +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: str q2, [sp, #48] +; NONEON-NOSVE-NEXT: csel x11, x8, x10, ne +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: csel x8, x8, x9, ne +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp, #48] +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #64] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: csel x11, x8, x10, ne +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: csel x8, x8, x9, ne +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load volatile <4 x i64>, ptr %a %op2 = load volatile <4 x i64>, ptr %b %sel = select i1 %mask, <4 x i64> %op1, <4 x i64> %op2 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll index 9319bd69c25fb..2778e93416a74 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -19,6 +20,31 @@ define <4 x i8> @ashr_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #20] +; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #18] +; NONEON-NOSVE-NEXT: ldrsb w13, [sp, #10] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldrsb w14, [sp, #8] +; NONEON-NOSVE-NEXT: asr w10, w11, w10 +; NONEON-NOSVE-NEXT: asr w11, w13, w12 +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: asr w8, w14, w9 +; NONEON-NOSVE-NEXT: strh w10, [sp, #28] +; NONEON-NOSVE-NEXT: strh w11, [sp, #26] +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = ashr <4 x i8> %op1, %op2 ret <4 x i8> %res } @@ -32,6 +58,47 @@ define <8 x i8> @ashr_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = ashr <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -45,6 +112,78 @@ define <16 x i8> @ashr_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = ashr <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -60,6 +199,147 @@ define void @ashr_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: asr z1.b, p0/m, z1.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #47] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #95] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #62] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #61] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #93] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #60] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #59] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #91] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #58] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #57] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #89] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #56] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #55] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #54] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #53] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #85] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #52] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #51] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #50] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #49] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #81] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #48] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #79] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #77] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #75] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #73] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #71] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #69] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #67] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #65] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = ashr <32 x i8> %op1, %op2 @@ -78,6 +358,22 @@ define <2 x i16> @ashr_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #16] +; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #8] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: asr w9, w11, w10 +; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = ashr <2 x i16> %op1, %op2 ret <2 x i16> %res } @@ -91,6 +387,31 @@ define <4 x i16> @ashr_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = ashr <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -104,6 +425,46 @@ define <8 x i16> @ashr_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = ashr <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -119,6 +480,83 @@ define void @ashr_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: asr z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #46] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #44] +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #42] +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #58] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #40] +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #38] +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #54] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #36] +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #34] +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #50] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrsh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = ashr <16 x i16> %op1, %op2 @@ -135,6 +573,21 @@ define <2 x i32> @ashr_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: asr w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = ashr <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -148,6 +601,26 @@ define <4 x i32> @ashr_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: asr w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: asr w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = ashr <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -163,6 +636,43 @@ define void @ashr_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: asr z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] +; NONEON-NOSVE-NEXT: asr w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #88] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] +; NONEON-NOSVE-NEXT: asr w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: asr w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #72] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: asr w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: asr w8, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = ashr <8 x i32> %op1, %op2 @@ -179,6 +689,18 @@ define <1 x i64> @ashr_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: asr x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = ashr <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -192,6 +714,20 @@ define <2 x i64> @ashr_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: asr x11, x10, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: asr x8, x9, x8 +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = ashr <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -207,6 +743,31 @@ define void @ashr_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: asr z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #56] +; NONEON-NOSVE-NEXT: asr x11, x10, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #48] +; NONEON-NOSVE-NEXT: asr x8, x9, x8 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #80] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: asr x11, x10, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: asr x8, x9, x8 +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = ashr <4 x i64> %op1, %op2 @@ -229,6 +790,31 @@ define <4 x i8> @lshr_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #10] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #8] +; NONEON-NOSVE-NEXT: lsr w10, w11, w10 +; NONEON-NOSVE-NEXT: lsr w11, w13, w12 +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: lsr w8, w14, w9 +; NONEON-NOSVE-NEXT: strh w10, [sp, #28] +; NONEON-NOSVE-NEXT: strh w11, [sp, #26] +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = lshr <4 x i8> %op1, %op2 ret <4 x i8> %res } @@ -242,6 +828,47 @@ define <8 x i8> @lshr_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = lshr <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -255,6 +882,78 @@ define <16 x i8> @lshr_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = lshr <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -270,6 +969,147 @@ define void @lshr_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: lsr z1.b, p0/m, z1.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #47] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #95] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #62] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #61] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #93] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #60] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #59] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #91] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #58] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #57] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #89] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #56] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #55] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #54] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #53] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #85] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #52] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #51] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #50] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #49] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #81] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #48] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #79] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #77] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #75] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #73] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #71] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #69] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #67] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #65] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = lshr <32 x i8> %op1, %op2 @@ -288,6 +1128,22 @@ define <2 x i16> @lshr_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #8] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: lsr w9, w11, w10 +; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = lshr <2 x i16> %op1, %op2 ret <2 x i16> %res } @@ -301,6 +1157,31 @@ define <4 x i16> @lshr_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = lshr <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -314,6 +1195,46 @@ define <8 x i16> @lshr_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = lshr <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -329,6 +1250,83 @@ define void @lshr_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: lsr z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #46] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #44] +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #42] +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #58] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #40] +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #38] +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #54] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #36] +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #34] +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #50] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = lshr <16 x i16> %op1, %op2 @@ -345,6 +1343,21 @@ define <2 x i32> @lshr_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsr w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = lshr <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -358,6 +1371,26 @@ define <4 x i32> @lshr_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: lsr w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsr w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = lshr <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -373,6 +1406,43 @@ define void @lshr_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: lsr z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] +; NONEON-NOSVE-NEXT: lsr w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #88] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] +; NONEON-NOSVE-NEXT: lsr w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: lsr w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #72] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsr w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsr w8, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = lshr <8 x i32> %op1, %op2 @@ -389,6 +1459,18 @@ define <1 x i64> @lshr_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: lsr x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = lshr <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -402,6 +1484,20 @@ define <2 x i64> @lshr_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: lsr x11, x10, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: lsr x8, x9, x8 +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = lshr <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -417,6 +1513,31 @@ define void @lshr_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: lsr z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #56] +; NONEON-NOSVE-NEXT: lsr x11, x10, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #48] +; NONEON-NOSVE-NEXT: lsr x8, x9, x8 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #80] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: lsr x11, x10, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: lsr x8, x9, x8 +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = lshr <4 x i64> %op1, %op2 @@ -438,6 +1559,22 @@ define <2 x i8> @shl_v2i8(<2 x i8> %op1, <2 x i8> %op2) { ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v2i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #20] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsl w11, w10, w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = shl <2 x i8> %op1, %op2 ret <2 x i8> %res } @@ -452,6 +1589,31 @@ define <4 x i8> @shl_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsl w11, w12, w11 +; NONEON-NOSVE-NEXT: strh w11, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #12] +; NONEON-NOSVE-NEXT: lsl w10, w11, w10 +; NONEON-NOSVE-NEXT: strh w10, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #10] +; NONEON-NOSVE-NEXT: lsl w9, w10, w9 +; NONEON-NOSVE-NEXT: strh w9, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = shl <4 x i8> %op1, %op2 ret <4 x i8> %res } @@ -465,6 +1627,47 @@ define <8 x i8> @shl_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = shl <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -478,6 +1681,78 @@ define <16 x i8> @shl_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = shl <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -493,6 +1768,147 @@ define void @shl_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: lsl z1.b, p0/m, z1.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #47] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #95] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #62] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #61] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #93] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #60] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #59] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #91] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #58] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #57] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #89] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #56] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #55] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #54] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #53] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #85] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #52] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #51] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #50] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #49] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #81] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #48] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #79] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #77] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #75] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #73] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #71] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #69] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #67] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #65] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = shl <32 x i8> %op1, %op2 @@ -509,6 +1925,31 @@ define <4 x i16> @shl_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = shl <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -522,6 +1963,46 @@ define <8 x i16> @shl_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = shl <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -537,6 +2018,83 @@ define void @shl_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: lsl z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #46] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #44] +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #42] +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #58] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #40] +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #38] +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #54] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #36] +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #34] +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #50] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = shl <16 x i16> %op1, %op2 @@ -553,6 +2111,21 @@ define <2 x i32> @shl_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsl w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = shl <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -566,6 +2139,26 @@ define <4 x i32> @shl_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: lsl w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsl w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = shl <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -581,6 +2174,43 @@ define void @shl_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: lsl z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] +; NONEON-NOSVE-NEXT: lsl w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #88] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] +; NONEON-NOSVE-NEXT: lsl w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: lsl w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: ldp w9, w10, [sp] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #72] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: lsl w11, w10, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: lsl w8, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = shl <8 x i32> %op1, %op2 @@ -597,6 +2227,18 @@ define <1 x i64> @shl_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: lsl x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = shl <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -610,6 +2252,20 @@ define <2 x i64> @shl_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: lsl x11, x10, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: lsl x8, x9, x8 +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %res = shl <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -625,6 +2281,31 @@ define void @shl_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #56] +; NONEON-NOSVE-NEXT: lsl x11, x10, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #48] +; NONEON-NOSVE-NEXT: lsl x8, x9, x8 +; NONEON-NOSVE-NEXT: ldp x9, x10, [sp] +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #80] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: lsl x11, x10, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: lsl x8, x9, x8 +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = shl <4 x i64> %op1, %op2 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll index 27dbfc9a23a8d..fd2d9a8fb80d1 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -15,6 +16,30 @@ define <4 x half> @ucvtf_v4i16_v4f16(<4 x i16> %op1) { ; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v4i16_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #14] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = uitofp <4 x i16> %op1 to <4 x half> ret <4 x half> %res } @@ -27,6 +52,48 @@ define void @ucvtf_v8i16_v8f16(ptr %a, ptr %b) { ; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v8i16_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #18] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i16>, ptr %a %res = uitofp <8 x i16> %op1 to <8 x half> store <8 x half> %res, ptr %b @@ -42,6 +109,80 @@ define void @ucvtf_v16i16_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: ucvtf z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v16i16_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #58] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #54] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #52] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #50] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #38] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %res = uitofp <16 x i16> %op1 to <16 x half> store <16 x half> %res, ptr %b @@ -61,6 +202,19 @@ define <2 x float> @ucvtf_v2i16_v2f32(<2 x i16> %op1) { ; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v2i16_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] +; NONEON-NOSVE-NEXT: ucvtf s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = uitofp <2 x i16> %op1 to <2 x float> ret <2 x float> %res } @@ -74,6 +228,25 @@ define <4 x float> @ucvtf_v4i16_v4f32(<4 x i16> %op1) { ; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v4i16_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: ucvtf s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] +; NONEON-NOSVE-NEXT: ucvtf s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = uitofp <4 x i16> %op1 to <4 x float> ret <4 x float> %res } @@ -90,6 +263,38 @@ define void @ucvtf_v8i16_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v8i16_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ucvtf s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: ucvtf s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ucvtf s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: ucvtf s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i16>, ptr %a %res = uitofp <8 x i16> %op1 to <8 x float> store <8 x float> %res, ptr %b @@ -114,6 +319,62 @@ define void @ucvtf_v16i16_v16f32(ptr %a, ptr %b) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v16i16_v16f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-128]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #46] +; NONEON-NOSVE-NEXT: ucvtf s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #44] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #88] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #42] +; NONEON-NOSVE-NEXT: ucvtf s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #40] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #80] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #38] +; NONEON-NOSVE-NEXT: ucvtf s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #36] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #72] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #34] +; NONEON-NOSVE-NEXT: ucvtf s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #32] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #64] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64] +; NONEON-NOSVE-NEXT: ucvtf s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #120] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: ucvtf s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #112] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: ucvtf s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #104] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: ucvtf s1, s0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q2, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %res = uitofp <16 x i16> %op1 to <16 x float> store <16 x float> %res, ptr %b @@ -132,6 +393,17 @@ define <1 x double> @ucvtf_v1i16_v1f64(<1 x i16> %op1) { ; CHECK-NEXT: and w8, w8, #0xffff ; CHECK-NEXT: ucvtf d0, w8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v1i16_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: str d0, [sp] +; NONEON-NOSVE-NEXT: ldr d0, [sp], #16 +; NONEON-NOSVE-NEXT: ret %res = uitofp <1 x i16> %op1 to <1 x double> ret <1 x double> %res } @@ -146,6 +418,20 @@ define <2 x double> @ucvtf_v2i16_v2f64(<2 x i16> %op1) { ; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v2i16_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: ucvtf d1, d0 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = uitofp <2 x i16> %op1 to <2 x double> ret <2 x double> %res } @@ -163,6 +449,35 @@ define void @ucvtf_v4i16_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v4i16_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #80 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #44] +; NONEON-NOSVE-NEXT: ucvtf d1, d0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #40] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #36] +; NONEON-NOSVE-NEXT: ucvtf d1, d0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #32] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i16>, ptr %a %res = uitofp <4 x i16> %op1 to <4 x double> store <4 x double> %res, ptr %b @@ -191,6 +506,57 @@ define void @ucvtf_v8i16_v8f64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q1, q3, [x1] ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v8i16_v8f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #160 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #18] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #92] +; NONEON-NOSVE-NEXT: ucvtf d1, d0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #88] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #144] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #84] +; NONEON-NOSVE-NEXT: ucvtf d1, d0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #80] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #128] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #76] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #128] +; NONEON-NOSVE-NEXT: ucvtf d1, d0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #72] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #68] +; NONEON-NOSVE-NEXT: ucvtf d1, d0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #64] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q2, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i16>, ptr %a %res = uitofp <8 x i16> %op1 to <8 x double> store <8 x double> %res, ptr %b @@ -239,6 +605,103 @@ define void @ucvtf_v16i16_v16f64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q1, q2, [x1, #32] ; CHECK-NEXT: stp q3, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v16i16_v16f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #336 +; NONEON-NOSVE-NEXT: str x29, [sp, #320] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 336 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ldr x29, [sp, #320] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: stp q0, q1, [sp] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #40] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #50] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #54] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #56] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #88] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #44] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #66] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #64] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #152] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #72] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #70] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #68] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #58] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #136] +; NONEON-NOSVE-NEXT: ldp d2, d1, [sp, #120] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] +; NONEON-NOSVE-NEXT: str d1, [sp, #328] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #104] +; NONEON-NOSVE-NEXT: str d0, [sp, #168] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #164] +; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #176] +; NONEON-NOSVE-NEXT: ucvtf d1, d0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #160] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #240] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #156] +; NONEON-NOSVE-NEXT: ucvtf d1, d0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #152] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #224] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #148] +; NONEON-NOSVE-NEXT: ucvtf d1, d0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #144] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #140] +; NONEON-NOSVE-NEXT: ucvtf d1, d0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #136] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #192] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #332] +; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #192] +; NONEON-NOSVE-NEXT: ucvtf d1, d0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #328] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #304] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #188] +; NONEON-NOSVE-NEXT: ucvtf d1, d0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #184] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #288] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #180] +; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #288] +; NONEON-NOSVE-NEXT: ucvtf d1, d0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #176] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #272] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #172] +; NONEON-NOSVE-NEXT: ucvtf d1, d0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #168] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #256] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #224] +; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #256] +; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32] +; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96] +; NONEON-NOSVE-NEXT: add sp, sp, #336 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %res = uitofp <16 x i16> %op1 to <16 x double> store <16 x double> %res, ptr %b @@ -258,6 +721,22 @@ define <2 x half> @ucvtf_v2i32_v2f16(<2 x i32> %op1) { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v2i32_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp] +; NONEON-NOSVE-NEXT: str wzr, [sp, #12] +; NONEON-NOSVE-NEXT: ucvtf s0, w9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = uitofp <2 x i32> %op1 to <2 x half> ret <2 x half> %res } @@ -271,6 +750,28 @@ define <4 x half> @ucvtf_v4i32_v4f16(<4 x i32> %op1) { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v4i32_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: ucvtf s0, w9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ucvtf s0, w9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = uitofp <4 x i32> %op1 to <4 x half> ret <4 x half> %res } @@ -288,6 +789,43 @@ define <8 x half> @ucvtf_v8i32_v8f16(ptr %a) { ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v8i32_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ucvtf s0, w9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ucvtf s0, w9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ucvtf s0, w9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #38] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: ucvtf s0, w9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %res = uitofp <8 x i32> %op1 to <8 x half> ret <8 x half> %res @@ -312,6 +850,76 @@ define void @ucvtf_v16i32_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h ; CHECK-NEXT: stp q2, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v16i32_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0, #32] +; NONEON-NOSVE-NEXT: str q1, [sp] +; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: str q2, [sp, #48] +; NONEON-NOSVE-NEXT: ucvtf s0, w9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #78] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #76] +; NONEON-NOSVE-NEXT: ucvtf s0, w9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #74] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #72] +; NONEON-NOSVE-NEXT: ucvtf s0, w9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #70] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #68] +; NONEON-NOSVE-NEXT: ucvtf s0, w9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #66] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #64] +; NONEON-NOSVE-NEXT: ucvtf s0, w9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #94] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #92] +; NONEON-NOSVE-NEXT: ucvtf s0, w9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #90] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #88] +; NONEON-NOSVE-NEXT: ucvtf s0, w9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #86] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #84] +; NONEON-NOSVE-NEXT: ucvtf s0, w9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #82] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i32>, ptr %a %res = uitofp <16 x i32> %op1 to <16 x half> store <16 x half> %res, ptr %b @@ -330,6 +938,18 @@ define <2 x float> @ucvtf_v2i32_v2f32(<2 x i32> %op1) { ; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v2i32_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp] +; NONEON-NOSVE-NEXT: ucvtf s1, w9 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = uitofp <2 x i32> %op1 to <2 x float> ret <2 x float> %res } @@ -342,6 +962,22 @@ define <4 x float> @ucvtf_v4i32_v4f32(<4 x i32> %op1) { ; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v4i32_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: ucvtf s1, w9 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: ucvtf s1, w9 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = uitofp <4 x i32> %op1 to <4 x float> ret <4 x float> %res } @@ -355,6 +991,32 @@ define void @ucvtf_v8i32_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: ucvtf z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v8i32_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ucvtf s1, w9 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56] +; NONEON-NOSVE-NEXT: ucvtf s1, w9 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48] +; NONEON-NOSVE-NEXT: ucvtf s1, w9 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40] +; NONEON-NOSVE-NEXT: ucvtf s1, w9 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %res = uitofp <8 x i32> %op1 to <8 x float> store <8 x float> %res, ptr %b @@ -374,6 +1036,20 @@ define <2 x double> @ucvtf_v2i32_v2f64(<2 x i32> %op1) { ; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v2i32_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: ucvtf d1, d0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = uitofp <2 x i32> %op1 to <2 x double> ret <2 x double> %res } @@ -390,6 +1066,28 @@ define void @ucvtf_v4i32_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v4i32_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: ucvtf d1, d0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: ucvtf d1, d0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i32>, ptr %a %res = uitofp <4 x i32> %op1 to <4 x double> store <4 x double> %res, ptr %b @@ -414,6 +1112,42 @@ define void @ucvtf_v8i32_v8f64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v8i32_v8f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-128]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #44] +; NONEON-NOSVE-NEXT: ucvtf d1, d0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #40] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #36] +; NONEON-NOSVE-NEXT: ucvtf d1, d0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #32] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #60] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64] +; NONEON-NOSVE-NEXT: ucvtf d1, d0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #56] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #52] +; NONEON-NOSVE-NEXT: ucvtf d1, d0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #48] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q2, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %res = uitofp <8 x i32> %op1 to <8 x double> store <8 x double> %res, ptr %b @@ -440,6 +1174,21 @@ define <2 x half> @ucvtf_v2i64_v2f16(<2 x i64> %op1) { ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v2i64_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp] +; NONEON-NOSVE-NEXT: ucvtf s0, x9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ucvtf s0, x8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = uitofp <2 x i64> %op1 to <2 x half> ret <2 x half> %res } @@ -460,6 +1209,29 @@ define <4 x half> @ucvtf_v4i64_v4f16(ptr %a) { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v4i64_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: ucvtf s0, x9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ucvtf s0, x8 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ucvtf s0, x9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ucvtf s0, x8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %res = uitofp <4 x i64> %op1 to <4 x half> ret <4 x half> %res @@ -493,6 +1265,47 @@ define <8 x half> @ucvtf_v8i64_v8f16(ptr %a) { ; CHECK-NEXT: splice z0.h, p0, z0.h, z2.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v8i64_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #80 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: str q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp q0, q3, [sp, #16] +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: str q2, [sp] +; NONEON-NOSVE-NEXT: ucvtf s0, x9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #78] +; NONEON-NOSVE-NEXT: ucvtf s0, x8 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #76] +; NONEON-NOSVE-NEXT: ucvtf s0, x9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #74] +; NONEON-NOSVE-NEXT: ucvtf s0, x8 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #72] +; NONEON-NOSVE-NEXT: ucvtf s0, x9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #70] +; NONEON-NOSVE-NEXT: ucvtf s0, x8 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #68] +; NONEON-NOSVE-NEXT: ucvtf s0, x9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #66] +; NONEON-NOSVE-NEXT: ucvtf s0, x8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #64] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i64>, ptr %a %res = uitofp <8 x i64> %op1 to <8 x half> ret <8 x half> %res @@ -511,6 +1324,18 @@ define <2 x float> @ucvtf_v2i64_v2f32(<2 x i64> %op1) { ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v2i64_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp] +; NONEON-NOSVE-NEXT: ucvtf s1, x9 +; NONEON-NOSVE-NEXT: ucvtf s0, x8 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = uitofp <2 x i64> %op1 to <2 x float> ret <2 x float> %res } @@ -528,6 +1353,23 @@ define <4 x float> @ucvtf_v4i64_v4f32(ptr %a) { ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v4i64_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: ucvtf s1, x9 +; NONEON-NOSVE-NEXT: ucvtf s0, x8 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40] +; NONEON-NOSVE-NEXT: ucvtf s1, x9 +; NONEON-NOSVE-NEXT: ucvtf s0, x8 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %res = uitofp <4 x i64> %op1 to <4 x float> ret <4 x float> %res @@ -552,6 +1394,36 @@ define void @ucvtf_v8i64_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s ; CHECK-NEXT: stp q2, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v8i64_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0, #32] +; NONEON-NOSVE-NEXT: str q1, [sp] +; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: str q2, [sp, #48] +; NONEON-NOSVE-NEXT: ucvtf s1, x9 +; NONEON-NOSVE-NEXT: ucvtf s0, x8 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #72] +; NONEON-NOSVE-NEXT: ucvtf s1, x9 +; NONEON-NOSVE-NEXT: ucvtf s0, x8 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #64] +; NONEON-NOSVE-NEXT: ucvtf s1, x9 +; NONEON-NOSVE-NEXT: ucvtf s0, x8 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #88] +; NONEON-NOSVE-NEXT: ucvtf s1, x9 +; NONEON-NOSVE-NEXT: ucvtf s0, x8 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i64>, ptr %a %res = uitofp <8 x i64> %op1 to <8 x float> store <8 x float> %res, ptr %b @@ -570,6 +1442,18 @@ define <2 x double> @ucvtf_v2i64_v2f64(<2 x i64> %op1) { ; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v2i64_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp] +; NONEON-NOSVE-NEXT: ucvtf d1, x9 +; NONEON-NOSVE-NEXT: ucvtf d0, x8 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = uitofp <2 x i64> %op1 to <2 x double> ret <2 x double> %res } @@ -583,6 +1467,24 @@ define void @ucvtf_v4i64_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: ucvtf z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v4i64_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: ucvtf d1, x9 +; NONEON-NOSVE-NEXT: ucvtf d0, x8 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ucvtf d1, x9 +; NONEON-NOSVE-NEXT: ucvtf d0, x8 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %res = uitofp <4 x i64> %op1 to <4 x double> store <4 x double> %res, ptr %b @@ -601,6 +1503,30 @@ define <4 x half> @scvtf_v4i16_v4f16(<4 x i16> %op1) { ; CHECK-NEXT: scvtf z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v4i16_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #6] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #14] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = sitofp <4 x i16> %op1 to <4 x half> ret <4 x half> %res } @@ -613,6 +1539,48 @@ define void @scvtf_v8i16_v8f16(ptr %a, ptr %b) { ; CHECK-NEXT: scvtf z0.h, p0/m, z0.h ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v8i16_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #14] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #22] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #20] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #18] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i16>, ptr %a %res = sitofp <8 x i16> %op1 to <8 x half> store <8 x half> %res, ptr %b @@ -628,6 +1596,80 @@ define void @scvtf_v16i16_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: scvtf z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v16i16_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #30] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #62] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #60] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #58] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #56] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #54] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #52] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #50] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #48] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #38] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %res = sitofp <16 x i16> %op1 to <16 x half> store <16 x half> %res, ptr %b @@ -646,6 +1688,19 @@ define <2 x float> @scvtf_v2i16_v2f32(<2 x i16> %op1) { ; CHECK-NEXT: scvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v2i16_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #4] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: scvtf s1, w9 +; NONEON-NOSVE-NEXT: stp s1, s0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = sitofp <2 x i16> %op1 to <2 x float> ret <2 x float> %res } @@ -659,6 +1714,25 @@ define <4 x float> @scvtf_v4i16_v4f32(<4 x i16> %op1) { ; CHECK-NEXT: scvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v4i16_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #14] +; NONEON-NOSVE-NEXT: scvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #12] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #10] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: scvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #8] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = sitofp <4 x i16> %op1 to <4 x float> ret <4 x float> %res } @@ -675,6 +1749,38 @@ define void @scvtf_v8i16_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: scvtf z0.s, p0/m, z0.s ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v8i16_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #30] +; NONEON-NOSVE-NEXT: scvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #26] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56] +; NONEON-NOSVE-NEXT: scvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #22] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48] +; NONEON-NOSVE-NEXT: scvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40] +; NONEON-NOSVE-NEXT: scvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i16>, ptr %a %res = sitofp <8 x i16> %op1 to <8 x float> store <8 x float> %res, ptr %b @@ -699,6 +1805,62 @@ define void @scvtf_v16i16_v16f32(ptr %a, ptr %b) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v16i16_v16f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-128]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #46] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: scvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #44] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #42] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #88] +; NONEON-NOSVE-NEXT: scvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #40] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #38] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #80] +; NONEON-NOSVE-NEXT: scvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #36] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #34] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #72] +; NONEON-NOSVE-NEXT: scvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #32] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #62] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #64] +; NONEON-NOSVE-NEXT: scvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #58] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #120] +; NONEON-NOSVE-NEXT: scvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #56] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #54] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #112] +; NONEON-NOSVE-NEXT: scvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #52] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #50] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #104] +; NONEON-NOSVE-NEXT: scvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #48] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q2, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %res = sitofp <16 x i16> %op1 to <16 x float> store <16 x float> %res, ptr %b @@ -720,6 +1882,20 @@ define <2 x double> @scvtf_v2i16_v2f64(<2 x i16> %op1) { ; CHECK-NEXT: scvtf z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v2i16_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = sitofp <2 x i16> %op1 to <2 x double> ret <2 x double> %res } @@ -737,6 +1913,33 @@ define void @scvtf_v4i16_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: scvtf z0.d, p0/m, z0.d ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v4i16_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #80 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #12] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i16>, ptr %a %res = sitofp <4 x i16> %op1 to <4 x double> store <4 x double> %res, ptr %b @@ -765,6 +1968,53 @@ define void @scvtf_v8i16_v8f64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q1, q3, [x1] ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v8i16_v8f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #160 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #26] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #18] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #22] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #88] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #80] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #144] +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #72] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #128] +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #128] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q2, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i16>, ptr %a %res = sitofp <8 x i16> %op1 to <8 x double> store <8 x double> %res, ptr %b @@ -813,6 +2063,96 @@ define void @scvtf_v16i16_v16f64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q1, q2, [x1, #32] ; CHECK-NEXT: stp q3, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v16i16_v16f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #336 +; NONEON-NOSVE-NEXT: str x29, [sp, #320] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 336 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ldr x29, [sp, #320] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: stp q0, q1, [sp] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #40] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #50] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #48] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #54] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #52] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #56] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #88] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #44] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #66] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #64] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #152] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #72] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #70] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #68] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #58] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #56] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #136] +; NONEON-NOSVE-NEXT: ldp d2, d1, [sp, #120] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #60] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] +; NONEON-NOSVE-NEXT: str d1, [sp, #328] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #104] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #160] +; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #176] +; NONEON-NOSVE-NEXT: str d0, [sp, #168] +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #152] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #240] +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #144] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #224] +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #136] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208] +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #332] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #192] +; NONEON-NOSVE-NEXT: scvtf d1, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #328] +; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #192] +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #184] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #304] +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #176] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #288] +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #168] +; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #288] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #272] +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #256] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #224] +; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #256] +; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32] +; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96] +; NONEON-NOSVE-NEXT: add sp, sp, #336 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %res = sitofp <16 x i16> %op1 to <16 x double> store <16 x double> %res, ptr %b @@ -832,6 +2172,22 @@ define <2 x half> @scvtf_v2i32_v2f16(<2 x i32> %op1) { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v2i32_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp] +; NONEON-NOSVE-NEXT: str wzr, [sp, #12] +; NONEON-NOSVE-NEXT: scvtf s0, w9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = sitofp <2 x i32> %op1 to <2 x half> ret <2 x half> %res } @@ -845,6 +2201,28 @@ define <4 x half> @scvtf_v4i32_v4f16(<4 x i32> %op1) { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v4i32_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: scvtf s0, w9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: scvtf s0, w9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = sitofp <4 x i32> %op1 to <4 x half> ret <4 x half> %res } @@ -862,6 +2240,43 @@ define <8 x half> @scvtf_v8i32_v8f16(ptr %a) { ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v8i32_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: scvtf s0, w9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: scvtf s0, w9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: scvtf s0, w9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #38] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: scvtf s0, w9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %res = sitofp <8 x i32> %op1 to <8 x half> ret <8 x half> %res @@ -879,6 +2294,18 @@ define <2 x float> @scvtf_v2i32_v2f32(<2 x i32> %op1) { ; CHECK-NEXT: scvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v2i32_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp] +; NONEON-NOSVE-NEXT: scvtf s1, w9 +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = sitofp <2 x i32> %op1 to <2 x float> ret <2 x float> %res } @@ -891,6 +2318,22 @@ define <4 x float> @scvtf_v4i32_v4f32(<4 x i32> %op1) { ; CHECK-NEXT: scvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v4i32_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: scvtf s1, w9 +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: scvtf s1, w9 +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = sitofp <4 x i32> %op1 to <4 x float> ret <4 x float> %res } @@ -904,6 +2347,32 @@ define void @scvtf_v8i32_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: scvtf z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v8i32_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: scvtf s1, w9 +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56] +; NONEON-NOSVE-NEXT: scvtf s1, w9 +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48] +; NONEON-NOSVE-NEXT: scvtf s1, w9 +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40] +; NONEON-NOSVE-NEXT: scvtf s1, w9 +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %res = sitofp <8 x i32> %op1 to <8 x float> store <8 x float> %res, ptr %b @@ -923,6 +2392,19 @@ define <2 x double> @scvtf_v2i32_v2f64(<2 x i32> %op1) { ; CHECK-NEXT: scvtf z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v2i32_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = sitofp <2 x i32> %op1 to <2 x double> ret <2 x double> %res } @@ -939,6 +2421,26 @@ define void @scvtf_v4i32_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: scvtf z0.d, p0/m, z0.d ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v4i32_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i32>, ptr %a %res = sitofp <4 x i32> %op1 to <4 x double> store <4 x double> %res, ptr %b @@ -963,6 +2465,38 @@ define void @scvtf_v8i32_v8f64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v8i32_v8f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-128]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] +; NONEON-NOSVE-NEXT: stp q2, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %res = sitofp <8 x i32> %op1 to <8 x double> store <8 x double> %res, ptr %b @@ -1007,6 +2541,72 @@ define void @scvtf_v16i32_v16f64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q2, q1, [x1] ; CHECK-NEXT: stp q4, q0, [x1, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v16i32_v16f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #272 +; NONEON-NOSVE-NEXT: str x29, [sp, #256] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 272 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ldr x29, [sp, #256] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32] +; NONEON-NOSVE-NEXT: str q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q2, [sp] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp] +; NONEON-NOSVE-NEXT: str q3, [sp, #64] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #48] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #104] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: str d0, [sp, #264] +; NONEON-NOSVE-NEXT: ldp d0, d2, [sp, #16] +; NONEON-NOSVE-NEXT: stp d2, d0, [sp, #88] +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: scvtf d2, w9 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #120] +; NONEON-NOSVE-NEXT: scvtf d0, w9 +; NONEON-NOSVE-NEXT: str d0, [sp, #152] +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #104] +; NONEON-NOSVE-NEXT: stp d2, d0, [sp, #136] +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #112] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #192] +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #120] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208] +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #268] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #224] +; NONEON-NOSVE-NEXT: scvtf d1, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #264] +; NONEON-NOSVE-NEXT: ldp q4, q6, [sp, #208] +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #88] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #240] +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #96] +; NONEON-NOSVE-NEXT: ldr q7, [sp, #240] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #160] +; NONEON-NOSVE-NEXT: scvtf d1, w9 +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ldr q5, [sp, #160] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #176] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #128] +; NONEON-NOSVE-NEXT: ldp q2, q3, [sp, #176] +; NONEON-NOSVE-NEXT: stp q7, q6, [x1, #64] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: stp q4, q3, [x1, #32] +; NONEON-NOSVE-NEXT: stp q2, q5, [x1, #96] +; NONEON-NOSVE-NEXT: add sp, sp, #272 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i32>, ptr %a %res = sitofp <16 x i32> %op1 to <16 x double> store <16 x double> %res, ptr %b @@ -1033,6 +2633,21 @@ define <2 x half> @scvtf_v2i64_v2f16(<2 x i64> %op1) { ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v2i64_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp] +; NONEON-NOSVE-NEXT: scvtf s0, x9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: scvtf s0, x8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = sitofp <2 x i64> %op1 to <2 x half> ret <2 x half> %res } @@ -1053,6 +2668,29 @@ define <4 x half> @scvtf_v4i64_v4f16(ptr %a) { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v4i64_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: scvtf s0, x9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: scvtf s0, x8 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: scvtf s0, x9 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: scvtf s0, x8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %res = sitofp <4 x i64> %op1 to <4 x half> ret <4 x half> %res @@ -1071,6 +2709,18 @@ define <2 x float> @scvtf_v2i64_v2f32(<2 x i64> %op1) { ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v2i64_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp] +; NONEON-NOSVE-NEXT: scvtf s1, x9 +; NONEON-NOSVE-NEXT: scvtf s0, x8 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = sitofp <2 x i64> %op1 to <2 x float> ret <2 x float> %res } @@ -1088,6 +2738,23 @@ define <4 x float> @scvtf_v4i64_v4f32(ptr %a) { ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v4i64_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: scvtf s1, x9 +; NONEON-NOSVE-NEXT: scvtf s0, x8 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40] +; NONEON-NOSVE-NEXT: scvtf s1, x9 +; NONEON-NOSVE-NEXT: scvtf s0, x8 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %res = sitofp <4 x i64> %op1 to <4 x float> ret <4 x float> %res @@ -1105,6 +2772,18 @@ define <2 x double> @scvtf_v2i64_v2f64(<2 x i64> %op1) { ; CHECK-NEXT: scvtf z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v2i64_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp] +; NONEON-NOSVE-NEXT: scvtf d1, x9 +; NONEON-NOSVE-NEXT: scvtf d0, x8 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = sitofp <2 x i64> %op1 to <2 x double> ret <2 x double> %res } @@ -1118,6 +2797,24 @@ define void @scvtf_v4i64_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: scvtf z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v4i64_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: scvtf d1, x9 +; NONEON-NOSVE-NEXT: scvtf d0, x8 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] +; NONEON-NOSVE-NEXT: scvtf d1, x9 +; NONEON-NOSVE-NEXT: scvtf d0, x8 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %res = sitofp <4 x i64> %op1 to <4 x double> store <4 x double> %res, ptr %b @@ -1130,6 +2827,13 @@ define half @scvtf_i16_f16(ptr %0) { ; CHECK-NEXT: ldrsh w8, [x0] ; CHECK-NEXT: scvtf h0, w8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_i16_f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldrsh w8, [x0] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %2 = load i16, ptr %0, align 64 %3 = sitofp i16 %2 to half ret half %3 @@ -1141,6 +2845,12 @@ define float @scvtf_i16_f32(ptr %0) { ; CHECK-NEXT: ldrsh w8, [x0] ; CHECK-NEXT: scvtf s0, w8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_i16_f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldrsh w8, [x0] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ret %2 = load i16, ptr %0, align 64 %3 = sitofp i16 %2 to float ret float %3 @@ -1152,6 +2862,12 @@ define double @scvtf_i16_f64(ptr %0) { ; CHECK-NEXT: ldrsh w8, [x0] ; CHECK-NEXT: scvtf d0, w8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_i16_f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldrsh w8, [x0] +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ret %2 = load i16, ptr %0, align 64 %3 = sitofp i16 %2 to double ret double %3 @@ -1163,6 +2879,13 @@ define half @scvtf_i32_f16(ptr %0) { ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: scvtf h0, w8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_i32_f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr w8, [x0] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %2 = load i32, ptr %0, align 64 %3 = sitofp i32 %2 to half ret half %3 @@ -1174,6 +2897,12 @@ define float @scvtf_i32_f32(ptr %0) { ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: scvtf s0, w8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_i32_f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr w8, [x0] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ret %2 = load i32, ptr %0, align 64 %3 = sitofp i32 %2 to float ret float %3 @@ -1185,6 +2914,12 @@ define double @scvtf_i32_f64(ptr %0) { ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: scvtf d0, w8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_i32_f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr w8, [x0] +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ret %2 = load i32, ptr %0, align 64 %3 = sitofp i32 %2 to double ret double %3 @@ -1196,6 +2931,13 @@ define half @scvtf_i64_f16(ptr %0) { ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: scvtf h0, x8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_i64_f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr x8, [x0] +; NONEON-NOSVE-NEXT: scvtf s0, x8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %2 = load i64, ptr %0, align 64 %3 = sitofp i64 %2 to half ret half %3 @@ -1207,6 +2949,12 @@ define float @scvtf_i64_f32(ptr %0) { ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: scvtf s0, x8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_i64_f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr x8, [x0] +; NONEON-NOSVE-NEXT: scvtf s0, x8 +; NONEON-NOSVE-NEXT: ret %2 = load i64, ptr %0, align 64 %3 = sitofp i64 %2 to float ret float %3 @@ -1218,6 +2966,12 @@ define double @scvtf_i64_f64(ptr %0) { ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: scvtf d0, x8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_i64_f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr x8, [x0] +; NONEON-NOSVE-NEXT: scvtf d0, x8 +; NONEON-NOSVE-NEXT: ret %2 = load i64, ptr %0, align 64 %3 = sitofp i64 %2 to double ret double %3 @@ -1229,6 +2983,13 @@ define half @ucvtf_i16_f16(ptr %0) { ; CHECK-NEXT: ldrh w8, [x0] ; CHECK-NEXT: ucvtf h0, w8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_i16_f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr h0, [x0] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %2 = load i16, ptr %0, align 64 %3 = uitofp i16 %2 to half ret half %3 @@ -1240,6 +3001,12 @@ define float @ucvtf_i16_f32(ptr %0) { ; CHECK-NEXT: ldr h0, [x0] ; CHECK-NEXT: ucvtf s0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_i16_f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr h0, [x0] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ret %2 = load i16, ptr %0, align 64 %3 = uitofp i16 %2 to float ret float %3 @@ -1251,6 +3018,12 @@ define double @ucvtf_i16_f64(ptr %0) { ; CHECK-NEXT: ldr h0, [x0] ; CHECK-NEXT: ucvtf d0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_i16_f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr h0, [x0] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: ret %2 = load i16, ptr %0, align 64 %3 = uitofp i16 %2 to double ret double %3 @@ -1262,6 +3035,13 @@ define half @ucvtf_i32_f16(ptr %0) { ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: ucvtf h0, w8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_i32_f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr w8, [x0] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %2 = load i32, ptr %0, align 64 %3 = uitofp i32 %2 to half ret half %3 @@ -1273,6 +3053,12 @@ define float @ucvtf_i32_f32(ptr %0) { ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: ucvtf s0, w8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_i32_f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr w8, [x0] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ret %2 = load i32, ptr %0, align 64 %3 = uitofp i32 %2 to float ret float %3 @@ -1284,6 +3070,12 @@ define double @ucvtf_i32_f64(ptr %0) { ; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ucvtf d0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_i32_f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr s0, [x0] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: ret %2 = load i32, ptr %0, align 64 %3 = uitofp i32 %2 to double ret double %3 @@ -1295,6 +3087,13 @@ define half @ucvtf_i64_f16(ptr %0) { ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: ucvtf h0, x8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_i64_f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr x8, [x0] +; NONEON-NOSVE-NEXT: ucvtf s0, x8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %2 = load i64, ptr %0, align 64 %3 = uitofp i64 %2 to half ret half %3 @@ -1306,6 +3105,12 @@ define float @ucvtf_i64_f32(ptr %0) { ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: ucvtf s0, x8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_i64_f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr x8, [x0] +; NONEON-NOSVE-NEXT: ucvtf s0, x8 +; NONEON-NOSVE-NEXT: ret %2 = load i64, ptr %0, align 64 %3 = uitofp i64 %2 to float ret float %3 @@ -1317,6 +3122,12 @@ define double @ucvtf_i64_f64(ptr %0) { ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: ucvtf d0, x8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_i64_f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr x8, [x0] +; NONEON-NOSVE-NEXT: ucvtf d0, x8 +; NONEON-NOSVE-NEXT: ret %2 = load i64, ptr %0, align 64 %3 = uitofp i64 %2 to double ret double %3 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll index 3775a64a89a0c..af15d5f67ad15 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -18,6 +19,44 @@ define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, <4 x i1> %mask) { ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #22] +; NONEON-NOSVE-NEXT: str d0, [sp] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #20] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #14] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #6] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #18] +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w11, w11, #0, #1 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: tst w9, #0xffff +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: csel w9, w13, w12, ne +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #4] +; NONEON-NOSVE-NEXT: tst w11, #0xffff +; NONEON-NOSVE-NEXT: strh w9, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #2] +; NONEON-NOSVE-NEXT: csel w9, w12, w9, ne +; NONEON-NOSVE-NEXT: tst w10, #0xffff +; NONEON-NOSVE-NEXT: ldrh w10, [sp] +; NONEON-NOSVE-NEXT: strh w9, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: csel w9, w11, w9, ne +; NONEON-NOSVE-NEXT: tst w8, #0xffff +; NONEON-NOSVE-NEXT: strh w9, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: csel w8, w10, w9, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %sel = select <4 x i1> %mask, <4 x i8> %op1, <4 x i8> %op2 ret <4 x i8> %sel } @@ -36,6 +75,72 @@ define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) { ; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #23] +; NONEON-NOSVE-NEXT: str d0, [sp] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #15] +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #7] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #21] +; NONEON-NOSVE-NEXT: sbfx w13, w13, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w15, w15, #0, #1 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #20] +; NONEON-NOSVE-NEXT: sbfx w14, w14, #0, #1 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #18] +; NONEON-NOSVE-NEXT: tst w13, #0xff +; NONEON-NOSVE-NEXT: sbfx w12, w12, #0, #1 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #17] +; NONEON-NOSVE-NEXT: csel w13, w17, w16, ne +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #6] +; NONEON-NOSVE-NEXT: tst w15, #0xff +; NONEON-NOSVE-NEXT: strb w13, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #5] +; NONEON-NOSVE-NEXT: sbfx w11, w11, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: csel w13, w16, w13, ne +; NONEON-NOSVE-NEXT: tst w14, #0xff +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #4] +; NONEON-NOSVE-NEXT: strb w13, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #13] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: csel w13, w15, w13, ne +; NONEON-NOSVE-NEXT: tst w12, #0xff +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: strb w13, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #12] +; NONEON-NOSVE-NEXT: csel w12, w14, w13, ne +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #3] +; NONEON-NOSVE-NEXT: tst w11, #0xff +; NONEON-NOSVE-NEXT: strb w12, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #11] +; NONEON-NOSVE-NEXT: csel w11, w13, w12, ne +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #2] +; NONEON-NOSVE-NEXT: tst w10, #0xff +; NONEON-NOSVE-NEXT: strb w11, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #10] +; NONEON-NOSVE-NEXT: csel w10, w12, w11, ne +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #1] +; NONEON-NOSVE-NEXT: tst w9, #0xff +; NONEON-NOSVE-NEXT: strb w10, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #9] +; NONEON-NOSVE-NEXT: csel w9, w11, w10, ne +; NONEON-NOSVE-NEXT: ldrb w10, [sp] +; NONEON-NOSVE-NEXT: tst w8, #0xff +; NONEON-NOSVE-NEXT: strb w9, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: csel w8, w10, w9, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %sel = select <8 x i1> %mask, <8 x i8> %op1, <8 x i8> %op2 ret <8 x i8> %sel } @@ -54,6 +159,128 @@ define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask) ; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: stp q1, q2, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w2, [sp, #47] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: ldrb w4, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w5, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w6, [sp, #15] +; NONEON-NOSVE-NEXT: ldrb w3, [sp, #45] +; NONEON-NOSVE-NEXT: sbfx w2, w2, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w4, w4, #0, #1 +; NONEON-NOSVE-NEXT: ldrb w1, [sp, #44] +; NONEON-NOSVE-NEXT: sbfx w3, w3, #0, #1 +; NONEON-NOSVE-NEXT: ldrb w0, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w18, [sp, #42] +; NONEON-NOSVE-NEXT: tst w2, #0xff +; NONEON-NOSVE-NEXT: sbfx w1, w1, #0, #1 +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #41] +; NONEON-NOSVE-NEXT: csel w2, w6, w5, ne +; NONEON-NOSVE-NEXT: ldrb w5, [sp, #14] +; NONEON-NOSVE-NEXT: tst w4, #0xff +; NONEON-NOSVE-NEXT: strb w2, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w2, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w4, [sp, #13] +; NONEON-NOSVE-NEXT: sbfx w0, w0, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w18, w18, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w17, w17, #0, #1 +; NONEON-NOSVE-NEXT: csel w2, w5, w2, ne +; NONEON-NOSVE-NEXT: tst w3, #0xff +; NONEON-NOSVE-NEXT: ldrb w3, [sp, #12] +; NONEON-NOSVE-NEXT: strb w2, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w2, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #37] +; NONEON-NOSVE-NEXT: csel w2, w4, w2, ne +; NONEON-NOSVE-NEXT: tst w1, #0xff +; NONEON-NOSVE-NEXT: sbfx w16, w16, #0, #1 +; NONEON-NOSVE-NEXT: strb w2, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w2, [sp, #28] +; NONEON-NOSVE-NEXT: sbfx w15, w15, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w14, w14, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w13, w13, #0, #1 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #36] +; NONEON-NOSVE-NEXT: csel w1, w3, w2, ne +; NONEON-NOSVE-NEXT: ldrb w2, [sp, #11] +; NONEON-NOSVE-NEXT: tst w0, #0xff +; NONEON-NOSVE-NEXT: strb w1, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w1, [sp, #27] +; NONEON-NOSVE-NEXT: sbfx w12, w12, #0, #1 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #33] +; NONEON-NOSVE-NEXT: csel w0, w2, w1, ne +; NONEON-NOSVE-NEXT: ldrb w1, [sp, #10] +; NONEON-NOSVE-NEXT: tst w18, #0xff +; NONEON-NOSVE-NEXT: strb w0, [sp, #59] +; NONEON-NOSVE-NEXT: ldrb w0, [sp, #26] +; NONEON-NOSVE-NEXT: sbfx w11, w11, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #32] +; NONEON-NOSVE-NEXT: csel w18, w1, w0, ne +; NONEON-NOSVE-NEXT: ldrb w0, [sp, #9] +; NONEON-NOSVE-NEXT: tst w17, #0xff +; NONEON-NOSVE-NEXT: strb w18, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w18, [sp, #25] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: csel w17, w0, w18, ne +; NONEON-NOSVE-NEXT: ldrb w18, [sp, #8] +; NONEON-NOSVE-NEXT: tst w16, #0xff +; NONEON-NOSVE-NEXT: strb w17, [sp, #57] +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #24] +; NONEON-NOSVE-NEXT: csel w16, w18, w17, ne +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #7] +; NONEON-NOSVE-NEXT: tst w15, #0xff +; NONEON-NOSVE-NEXT: strb w16, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #23] +; NONEON-NOSVE-NEXT: csel w15, w17, w16, ne +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #6] +; NONEON-NOSVE-NEXT: tst w14, #0xff +; NONEON-NOSVE-NEXT: strb w15, [sp, #55] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #22] +; NONEON-NOSVE-NEXT: csel w14, w16, w15, ne +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #5] +; NONEON-NOSVE-NEXT: tst w13, #0xff +; NONEON-NOSVE-NEXT: strb w14, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #21] +; NONEON-NOSVE-NEXT: csel w13, w15, w14, ne +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #4] +; NONEON-NOSVE-NEXT: tst w12, #0xff +; NONEON-NOSVE-NEXT: strb w13, [sp, #53] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #20] +; NONEON-NOSVE-NEXT: csel w12, w14, w13, ne +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #3] +; NONEON-NOSVE-NEXT: tst w11, #0xff +; NONEON-NOSVE-NEXT: strb w12, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #19] +; NONEON-NOSVE-NEXT: csel w11, w13, w12, ne +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #2] +; NONEON-NOSVE-NEXT: tst w10, #0xff +; NONEON-NOSVE-NEXT: strb w11, [sp, #51] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #18] +; NONEON-NOSVE-NEXT: csel w10, w12, w11, ne +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #1] +; NONEON-NOSVE-NEXT: tst w9, #0xff +; NONEON-NOSVE-NEXT: strb w10, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #17] +; NONEON-NOSVE-NEXT: csel w9, w11, w10, ne +; NONEON-NOSVE-NEXT: ldrb w10, [sp] +; NONEON-NOSVE-NEXT: tst w8, #0xff +; NONEON-NOSVE-NEXT: strb w9, [sp, #49] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #16] +; NONEON-NOSVE-NEXT: csel w8, w10, w9, ne +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %sel = select <16 x i1> %mask, <16 x i8> %op1, <16 x i8> %op2 ret <16 x i8> %sel } @@ -70,6 +297,208 @@ define void @select_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: sel z1.b, p0, z2.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #208 +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #112] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #128] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #144] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #160] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #176] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #192] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 208 +; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 +; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 +; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 +; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 +; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 +; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 +; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 +; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 +; NONEON-NOSVE-NEXT: .cfi_offset w27, -72 +; NONEON-NOSVE-NEXT: .cfi_offset w28, -80 +; NONEON-NOSVE-NEXT: .cfi_offset w30, -88 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -96 +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #19] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #20] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, eq +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: ldrb w18, [sp, #37] +; NONEON-NOSVE-NEXT: csel w9, w11, w10, eq +; NONEON-NOSVE-NEXT: cmp w13, w12 +; NONEON-NOSVE-NEXT: ldrb w1, [sp, #21] +; NONEON-NOSVE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: csel w8, w13, w12, eq +; NONEON-NOSVE-NEXT: cmp w16, w14 +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w2, [sp, #22] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #4] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: csel w8, w16, w14, eq +; NONEON-NOSVE-NEXT: cmp w1, w18 +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #23] +; NONEON-NOSVE-NEXT: csel w12, w1, w18, eq +; NONEON-NOSVE-NEXT: cmp w2, w13 +; NONEON-NOSVE-NEXT: ldrb w18, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w1, [sp, #24] +; NONEON-NOSVE-NEXT: csel w13, w2, w13, eq +; NONEON-NOSVE-NEXT: cmp w16, w14 +; NONEON-NOSVE-NEXT: ldrb w2, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w5, [sp, #25] +; NONEON-NOSVE-NEXT: csel w14, w16, w14, eq +; NONEON-NOSVE-NEXT: cmp w1, w18 +; NONEON-NOSVE-NEXT: ldrb w6, [sp, #26] +; NONEON-NOSVE-NEXT: csel w16, w1, w18, eq +; NONEON-NOSVE-NEXT: ldrb w1, [sp, #42] +; NONEON-NOSVE-NEXT: cmp w5, w2 +; NONEON-NOSVE-NEXT: csel w18, w5, w2, eq +; NONEON-NOSVE-NEXT: ldrb w2, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w5, [sp, #27] +; NONEON-NOSVE-NEXT: cmp w6, w1 +; NONEON-NOSVE-NEXT: ldrb w19, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w29, [sp, #45] +; NONEON-NOSVE-NEXT: csel w1, w6, w1, eq +; NONEON-NOSVE-NEXT: ldrb w6, [sp, #44] +; NONEON-NOSVE-NEXT: cmp w5, w2 +; NONEON-NOSVE-NEXT: ldrb w30, [sp, #29] +; NONEON-NOSVE-NEXT: str w8, [sp] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: csel w2, w5, w2, eq +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: cmp w19, w6 +; NONEON-NOSVE-NEXT: stp q2, q3, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w21, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #31] +; NONEON-NOSVE-NEXT: csel w5, w19, w6, eq +; NONEON-NOSVE-NEXT: cmp w30, w29 +; NONEON-NOSVE-NEXT: ldrb w22, [sp, #64] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #48] +; NONEON-NOSVE-NEXT: csel w6, w30, w29, eq +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: ldrb w28, [sp, #65] +; NONEON-NOSVE-NEXT: ldrb w29, [sp, #49] +; NONEON-NOSVE-NEXT: csel w19, w8, w9, eq +; NONEON-NOSVE-NEXT: cmp w10, w21 +; NONEON-NOSVE-NEXT: ldrb w27, [sp, #66] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #50] +; NONEON-NOSVE-NEXT: csel w21, w10, w21, eq +; NONEON-NOSVE-NEXT: cmp w11, w22 +; NONEON-NOSVE-NEXT: ldrb w26, [sp, #67] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #51] +; NONEON-NOSVE-NEXT: csel w22, w11, w22, eq +; NONEON-NOSVE-NEXT: cmp w29, w28 +; NONEON-NOSVE-NEXT: ldrb w25, [sp, #68] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #52] +; NONEON-NOSVE-NEXT: csel w11, w29, w28, eq +; NONEON-NOSVE-NEXT: cmp w8, w27 +; NONEON-NOSVE-NEXT: ldrb w24, [sp, #69] +; NONEON-NOSVE-NEXT: ldrb w28, [sp, #53] +; NONEON-NOSVE-NEXT: csel w8, w8, w27, eq +; NONEON-NOSVE-NEXT: cmp w9, w26 +; NONEON-NOSVE-NEXT: ldrb w23, [sp, #70] +; NONEON-NOSVE-NEXT: ldrb w27, [sp, #54] +; NONEON-NOSVE-NEXT: csel w9, w9, w26, eq +; NONEON-NOSVE-NEXT: cmp w10, w25 +; NONEON-NOSVE-NEXT: ldrb w20, [sp, #71] +; NONEON-NOSVE-NEXT: ldrb w26, [sp, #55] +; NONEON-NOSVE-NEXT: csel w10, w10, w25, eq +; NONEON-NOSVE-NEXT: cmp w28, w24 +; NONEON-NOSVE-NEXT: ldrb w7, [sp, #72] +; NONEON-NOSVE-NEXT: ldrb w25, [sp, #56] +; NONEON-NOSVE-NEXT: csel w24, w28, w24, eq +; NONEON-NOSVE-NEXT: cmp w27, w23 +; NONEON-NOSVE-NEXT: ldrb w4, [sp, #73] +; NONEON-NOSVE-NEXT: ldrb w28, [sp, #57] +; NONEON-NOSVE-NEXT: csel w23, w27, w23, eq +; NONEON-NOSVE-NEXT: cmp w26, w20 +; NONEON-NOSVE-NEXT: ldrb w3, [sp, #74] +; NONEON-NOSVE-NEXT: ldrb w27, [sp, #58] +; NONEON-NOSVE-NEXT: csel w20, w26, w20, eq +; NONEON-NOSVE-NEXT: cmp w25, w7 +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #75] +; NONEON-NOSVE-NEXT: ldrb w26, [sp, #59] +; NONEON-NOSVE-NEXT: csel w7, w25, w7, eq +; NONEON-NOSVE-NEXT: cmp w28, w4 +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #76] +; NONEON-NOSVE-NEXT: ldrb w25, [sp, #60] +; NONEON-NOSVE-NEXT: csel w4, w28, w4, eq +; NONEON-NOSVE-NEXT: cmp w27, w3 +; NONEON-NOSVE-NEXT: csel w3, w27, w3, eq +; NONEON-NOSVE-NEXT: cmp w26, w17 +; NONEON-NOSVE-NEXT: ldrb w28, [sp, #77] +; NONEON-NOSVE-NEXT: ldrb w27, [sp, #61] +; NONEON-NOSVE-NEXT: csel w17, w26, w17, eq +; NONEON-NOSVE-NEXT: cmp w25, w15 +; NONEON-NOSVE-NEXT: ldrb w26, [sp, #78] +; NONEON-NOSVE-NEXT: csel w15, w25, w15, eq +; NONEON-NOSVE-NEXT: ldrb w25, [sp, #62] +; NONEON-NOSVE-NEXT: cmp w27, w28 +; NONEON-NOSVE-NEXT: ldrb w29, [sp, #79] +; NONEON-NOSVE-NEXT: ldrb w30, [sp, #63] +; NONEON-NOSVE-NEXT: strb w9, [sp, #99] +; NONEON-NOSVE-NEXT: csel w27, w27, w28, eq +; NONEON-NOSVE-NEXT: cmp w25, w26 +; NONEON-NOSVE-NEXT: strb w8, [sp, #98] +; NONEON-NOSVE-NEXT: csel w25, w25, w26, eq +; NONEON-NOSVE-NEXT: cmp w30, w29 +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: csel w26, w30, w29, eq +; NONEON-NOSVE-NEXT: ldrb w28, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w29, [sp, #16] +; NONEON-NOSVE-NEXT: strb w26, [sp, #111] +; NONEON-NOSVE-NEXT: strb w9, [sp, #84] +; NONEON-NOSVE-NEXT: cmp w29, w28 +; NONEON-NOSVE-NEXT: strb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w25, [sp, #110] +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #144] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w27, [sp, #109] +; NONEON-NOSVE-NEXT: strb w8, [sp, #81] +; NONEON-NOSVE-NEXT: csel w8, w29, w28, eq +; NONEON-NOSVE-NEXT: strb w15, [sp, #108] +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #128] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w17, [sp, #107] +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #112] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w3, [sp, #106] +; NONEON-NOSVE-NEXT: strb w4, [sp, #105] +; NONEON-NOSVE-NEXT: strb w7, [sp, #104] +; NONEON-NOSVE-NEXT: strb w20, [sp, #103] +; NONEON-NOSVE-NEXT: strb w23, [sp, #102] +; NONEON-NOSVE-NEXT: strb w24, [sp, #101] +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #160] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w10, [sp, #100] +; NONEON-NOSVE-NEXT: strb w11, [sp, #97] +; NONEON-NOSVE-NEXT: strb w22, [sp, #96] +; NONEON-NOSVE-NEXT: strb w21, [sp, #95] +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #176] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w19, [sp, #94] +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #192] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w6, [sp, #93] +; NONEON-NOSVE-NEXT: strb w5, [sp, #92] +; NONEON-NOSVE-NEXT: strb w2, [sp, #91] +; NONEON-NOSVE-NEXT: strb w1, [sp, #90] +; NONEON-NOSVE-NEXT: strb w18, [sp, #89] +; NONEON-NOSVE-NEXT: strb w16, [sp, #88] +; NONEON-NOSVE-NEXT: strb w14, [sp, #87] +; NONEON-NOSVE-NEXT: strb w13, [sp, #86] +; NONEON-NOSVE-NEXT: strb w12, [sp, #85] +; NONEON-NOSVE-NEXT: strb w9, [sp, #82] +; NONEON-NOSVE-NEXT: strb w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #80] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #208 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %mask = icmp eq <32 x i8> %op1, %op2 @@ -92,6 +521,29 @@ define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, <2 x i1> %mask) { ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #16] +; NONEON-NOSVE-NEXT: str d0, [sp] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #12] +; NONEON-NOSVE-NEXT: ldr w11, [sp, #4] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: csel w8, w11, w10, ne +; NONEON-NOSVE-NEXT: ldr w10, [sp] +; NONEON-NOSVE-NEXT: cmp w9, #0 +; NONEON-NOSVE-NEXT: str w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: csel w8, w10, w8, ne +; NONEON-NOSVE-NEXT: str w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %sel = select <2 x i1> %mask, <2 x i16> %op1, <2 x i16> %op2 ret <2 x i16> %sel } @@ -110,6 +562,44 @@ define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, <4 x i1> %mask) { ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #22] +; NONEON-NOSVE-NEXT: str d0, [sp] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #20] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #14] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #6] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #18] +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w11, w11, #0, #1 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: tst w9, #0xffff +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: csel w9, w13, w12, ne +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #4] +; NONEON-NOSVE-NEXT: tst w11, #0xffff +; NONEON-NOSVE-NEXT: strh w9, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #2] +; NONEON-NOSVE-NEXT: csel w9, w12, w9, ne +; NONEON-NOSVE-NEXT: tst w10, #0xffff +; NONEON-NOSVE-NEXT: ldrh w10, [sp] +; NONEON-NOSVE-NEXT: strh w9, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: csel w9, w11, w9, ne +; NONEON-NOSVE-NEXT: tst w8, #0xffff +; NONEON-NOSVE-NEXT: strh w9, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: csel w8, w10, w9, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %sel = select <4 x i1> %mask, <4 x i16> %op1, <4 x i16> %op2 ret <4 x i16> %sel } @@ -129,6 +619,72 @@ define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) { ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: str d2, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #47] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w16, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w17, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #45] +; NONEON-NOSVE-NEXT: sbfx w13, w13, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w15, w15, #0, #1 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #44] +; NONEON-NOSVE-NEXT: sbfx w14, w14, #0, #1 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #42] +; NONEON-NOSVE-NEXT: tst w13, #0xffff +; NONEON-NOSVE-NEXT: sbfx w12, w12, #0, #1 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #41] +; NONEON-NOSVE-NEXT: csel w13, w17, w16, ne +; NONEON-NOSVE-NEXT: ldrh w16, [sp, #12] +; NONEON-NOSVE-NEXT: tst w15, #0xffff +; NONEON-NOSVE-NEXT: strh w13, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #10] +; NONEON-NOSVE-NEXT: sbfx w11, w11, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: csel w13, w16, w13, ne +; NONEON-NOSVE-NEXT: tst w14, #0xffff +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #8] +; NONEON-NOSVE-NEXT: strh w13, [sp, #60] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #40] +; NONEON-NOSVE-NEXT: csel w13, w15, w13, ne +; NONEON-NOSVE-NEXT: tst w12, #0xffff +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: strh w13, [sp, #58] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #24] +; NONEON-NOSVE-NEXT: csel w12, w14, w13, ne +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #6] +; NONEON-NOSVE-NEXT: tst w11, #0xffff +; NONEON-NOSVE-NEXT: strh w12, [sp, #56] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #22] +; NONEON-NOSVE-NEXT: csel w11, w13, w12, ne +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #4] +; NONEON-NOSVE-NEXT: tst w10, #0xffff +; NONEON-NOSVE-NEXT: strh w11, [sp, #54] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #20] +; NONEON-NOSVE-NEXT: csel w10, w12, w11, ne +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #2] +; NONEON-NOSVE-NEXT: tst w9, #0xffff +; NONEON-NOSVE-NEXT: strh w10, [sp, #52] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #18] +; NONEON-NOSVE-NEXT: csel w9, w11, w10, ne +; NONEON-NOSVE-NEXT: ldrh w10, [sp] +; NONEON-NOSVE-NEXT: tst w8, #0xffff +; NONEON-NOSVE-NEXT: strh w9, [sp, #50] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #16] +; NONEON-NOSVE-NEXT: csel w8, w10, w9, ne +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %sel = select <8 x i1> %mask, <8 x i16> %op1, <8 x i16> %op2 ret <8 x i16> %sel } @@ -145,6 +701,102 @@ define void @select_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: sel z1.h, p0, z2.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #112 +; NONEON-NOSVE-NEXT: str x19, [sp, #96] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 112 +; NONEON-NOSVE-NEXT: .cfi_offset w19, -16 +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #20] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #4] +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #6] +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: ldrh w16, [sp, #24] +; NONEON-NOSVE-NEXT: ldrh w17, [sp, #8] +; NONEON-NOSVE-NEXT: csel w8, w9, w8, eq +; NONEON-NOSVE-NEXT: cmp w13, w12 +; NONEON-NOSVE-NEXT: ldrh w1, [sp, #12] +; NONEON-NOSVE-NEXT: csel w9, w13, w12, eq +; NONEON-NOSVE-NEXT: cmp w15, w14 +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #10] +; NONEON-NOSVE-NEXT: csel w14, w15, w14, eq +; NONEON-NOSVE-NEXT: cmp w17, w16 +; NONEON-NOSVE-NEXT: csel w16, w17, w16, eq +; NONEON-NOSVE-NEXT: ldrh w17, [sp, #28] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp, #32] +; NONEON-NOSVE-NEXT: cmp w13, w12 +; NONEON-NOSVE-NEXT: ldrh w3, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w4, [sp, #14] +; NONEON-NOSVE-NEXT: csel w12, w13, w12, eq +; NONEON-NOSVE-NEXT: cmp w1, w17 +; NONEON-NOSVE-NEXT: ldrh w5, [sp, #32] +; NONEON-NOSVE-NEXT: csel w17, w1, w17, eq +; NONEON-NOSVE-NEXT: ldrh w1, [sp, #48] +; NONEON-NOSVE-NEXT: cmp w4, w3 +; NONEON-NOSVE-NEXT: ldrh w6, [sp, #50] +; NONEON-NOSVE-NEXT: ldrh w7, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w2, [sp, #52] +; NONEON-NOSVE-NEXT: csel w3, w4, w3, eq +; NONEON-NOSVE-NEXT: ldrh w4, [sp, #36] +; NONEON-NOSVE-NEXT: cmp w5, w1 +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #54] +; NONEON-NOSVE-NEXT: ldrh w19, [sp, #38] +; NONEON-NOSVE-NEXT: csel w1, w5, w1, eq +; NONEON-NOSVE-NEXT: cmp w7, w6 +; NONEON-NOSVE-NEXT: ldrh w18, [sp, #56] +; NONEON-NOSVE-NEXT: ldrh w5, [sp, #40] +; NONEON-NOSVE-NEXT: csel w6, w7, w6, eq +; NONEON-NOSVE-NEXT: cmp w4, w2 +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #58] +; NONEON-NOSVE-NEXT: ldrh w7, [sp, #42] +; NONEON-NOSVE-NEXT: csel w2, w4, w2, eq +; NONEON-NOSVE-NEXT: cmp w19, w13 +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #60] +; NONEON-NOSVE-NEXT: ldrh w4, [sp, #44] +; NONEON-NOSVE-NEXT: csel w13, w19, w13, eq +; NONEON-NOSVE-NEXT: cmp w5, w18 +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w19, [sp, #46] +; NONEON-NOSVE-NEXT: csel w18, w5, w18, eq +; NONEON-NOSVE-NEXT: cmp w7, w15 +; NONEON-NOSVE-NEXT: ldrh w5, [sp] +; NONEON-NOSVE-NEXT: csel w15, w7, w15, eq +; NONEON-NOSVE-NEXT: cmp w4, w11 +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: csel w11, w4, w11, eq +; NONEON-NOSVE-NEXT: ldrh w4, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w19, w10 +; NONEON-NOSVE-NEXT: csel w10, w19, w10, eq +; NONEON-NOSVE-NEXT: strh w11, [sp, #92] +; NONEON-NOSVE-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: cmp w5, w4 +; NONEON-NOSVE-NEXT: strh w10, [sp, #94] +; NONEON-NOSVE-NEXT: csel w8, w5, w4, eq +; NONEON-NOSVE-NEXT: strh w15, [sp, #90] +; NONEON-NOSVE-NEXT: strh w18, [sp, #88] +; NONEON-NOSVE-NEXT: strh w13, [sp, #86] +; NONEON-NOSVE-NEXT: strh w2, [sp, #84] +; NONEON-NOSVE-NEXT: strh w6, [sp, #82] +; NONEON-NOSVE-NEXT: strh w1, [sp, #80] +; NONEON-NOSVE-NEXT: strh w3, [sp, #78] +; NONEON-NOSVE-NEXT: strh w17, [sp, #76] +; NONEON-NOSVE-NEXT: strh w12, [sp, #74] +; NONEON-NOSVE-NEXT: strh w16, [sp, #72] +; NONEON-NOSVE-NEXT: strh w14, [sp, #70] +; NONEON-NOSVE-NEXT: strh w9, [sp, #68] +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #112 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %mask = icmp eq <16 x i16> %op1, %op2 @@ -167,6 +819,29 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, <2 x i1> %mask) { ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #16] +; NONEON-NOSVE-NEXT: str d0, [sp] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #12] +; NONEON-NOSVE-NEXT: ldr w11, [sp, #4] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: csel w8, w11, w10, ne +; NONEON-NOSVE-NEXT: ldr w10, [sp] +; NONEON-NOSVE-NEXT: cmp w9, #0 +; NONEON-NOSVE-NEXT: str w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: csel w8, w10, w8, ne +; NONEON-NOSVE-NEXT: str w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %sel = select <2 x i1> %mask, <2 x i32> %op1, <2 x i32> %op2 ret <2 x i32> %sel } @@ -186,6 +861,44 @@ define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) { ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: str d2, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #46] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #44] +; NONEON-NOSVE-NEXT: ldr w12, [sp, #28] +; NONEON-NOSVE-NEXT: ldr w13, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #42] +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w11, w11, #0, #1 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #40] +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: cmp w9, #0 +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: csel w9, w13, w12, ne +; NONEON-NOSVE-NEXT: ldr w12, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w11, #0 +; NONEON-NOSVE-NEXT: str w9, [sp, #60] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr w11, [sp, #4] +; NONEON-NOSVE-NEXT: csel w9, w12, w9, ne +; NONEON-NOSVE-NEXT: cmp w10, #0 +; NONEON-NOSVE-NEXT: ldr w10, [sp] +; NONEON-NOSVE-NEXT: str w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #20] +; NONEON-NOSVE-NEXT: csel w9, w11, w9, ne +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: str w9, [sp, #52] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #16] +; NONEON-NOSVE-NEXT: csel w8, w10, w9, ne +; NONEON-NOSVE-NEXT: str w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %sel = select <4 x i1> %mask, <4 x i32> %op1, <4 x i32> %op2 ret <4 x i32> %sel } @@ -202,6 +915,47 @@ define void @select_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: sel z1.s, p0, z2.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldp w10, w13, [sp, #4] +; NONEON-NOSVE-NEXT: ldp w12, w11, [sp, #24] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr w14, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w10, w9 +; NONEON-NOSVE-NEXT: csel w9, w10, w9, eq +; NONEON-NOSVE-NEXT: cmp w13, w12 +; NONEON-NOSVE-NEXT: ldp w15, w16, [sp, #48] +; NONEON-NOSVE-NEXT: csel w12, w13, w12, eq +; NONEON-NOSVE-NEXT: cmp w14, w11 +; NONEON-NOSVE-NEXT: ldp w10, w13, [sp, #32] +; NONEON-NOSVE-NEXT: csel w11, w14, w11, eq +; NONEON-NOSVE-NEXT: ldp w17, w14, [sp, #56] +; NONEON-NOSVE-NEXT: ldp w18, w1, [sp, #40] +; NONEON-NOSVE-NEXT: cmp w10, w15 +; NONEON-NOSVE-NEXT: stp w12, w11, [sp, #72] +; NONEON-NOSVE-NEXT: csel w10, w10, w15, eq +; NONEON-NOSVE-NEXT: cmp w13, w16 +; NONEON-NOSVE-NEXT: ldr w15, [sp] +; NONEON-NOSVE-NEXT: csel w13, w13, w16, eq +; NONEON-NOSVE-NEXT: cmp w18, w17 +; NONEON-NOSVE-NEXT: csel w16, w18, w17, eq +; NONEON-NOSVE-NEXT: cmp w1, w14 +; NONEON-NOSVE-NEXT: stp w10, w13, [sp, #80] +; NONEON-NOSVE-NEXT: csel w10, w1, w14, eq +; NONEON-NOSVE-NEXT: cmp w15, w8 +; NONEON-NOSVE-NEXT: csel w8, w15, w8, eq +; NONEON-NOSVE-NEXT: stp w16, w10, [sp, #88] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %mask = icmp eq <8 x i32> %op1, %op2 @@ -223,6 +977,19 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, <1 x i1> %mask) { ; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: csel x8, x9, x8, ne +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %sel = select <1 x i1> %mask, <1 x i64> %op1, <1 x i64> %op2 ret <1 x i64> %sel } @@ -242,6 +1009,29 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) { ; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: str d2, [sp, #40] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #40] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp] +; NONEON-NOSVE-NEXT: ldr x10, [sp, #24] +; NONEON-NOSVE-NEXT: ldr x11, [sp, #8] +; NONEON-NOSVE-NEXT: sbfx x8, x8, #0, #1 +; NONEON-NOSVE-NEXT: sbfx x9, x9, #0, #1 +; NONEON-NOSVE-NEXT: cmp x8, #0 +; NONEON-NOSVE-NEXT: csel x8, x11, x10, ne +; NONEON-NOSVE-NEXT: ldr x10, [sp] +; NONEON-NOSVE-NEXT: cmp x9, #0 +; NONEON-NOSVE-NEXT: str x8, [sp, #56] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: csel x8, x10, x8, ne +; NONEON-NOSVE-NEXT: str x8, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %sel = select <2 x i1> %mask, <2 x i64> %op1, <2 x i64> %op2 ret <2 x i64> %sel } @@ -258,6 +1048,34 @@ define void @select_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: sel z1.d, p0, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: stp q2, q3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #8] +; NONEON-NOSVE-NEXT: ldp x8, x11, [sp, #24] +; NONEON-NOSVE-NEXT: ldr x13, [sp, #40] +; NONEON-NOSVE-NEXT: ldp x10, x12, [sp, #48] +; NONEON-NOSVE-NEXT: cmp x9, x8 +; NONEON-NOSVE-NEXT: csel x8, x9, x8, eq +; NONEON-NOSVE-NEXT: cmp x11, x10 +; NONEON-NOSVE-NEXT: csel x9, x11, x10, eq +; NONEON-NOSVE-NEXT: ldr x10, [sp, #16] +; NONEON-NOSVE-NEXT: ldr x11, [sp] +; NONEON-NOSVE-NEXT: cmp x13, x12 +; NONEON-NOSVE-NEXT: csel x12, x13, x12, eq +; NONEON-NOSVE-NEXT: cmp x11, x10 +; NONEON-NOSVE-NEXT: stp x9, x12, [sp, #80] +; NONEON-NOSVE-NEXT: csel x9, x11, x10, eq +; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %mask = icmp eq <4 x i64> %op1, %op2 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll index 0b6152340f65a..66d544d0acbf5 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll @@ -33,19 +33,23 @@ define void @alloc_v4i8(ptr %st_ptr) nounwind { ; ; NONEON-NOSVE-LABEL: alloc_v4i8: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #32 -; NONEON-NOSVE-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: sub sp, sp, #48 +; NONEON-NOSVE-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: mov x19, x0 -; NONEON-NOSVE-NEXT: add x0, sp, #12 +; NONEON-NOSVE-NEXT: add x0, sp, #28 ; NONEON-NOSVE-NEXT: bl def -; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] -; NONEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0 -; NONEON-NOSVE-NEXT: umov w8, v0.h[2] -; NONEON-NOSVE-NEXT: umov w9, v0.h[0] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: strh w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: strh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: str d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #16] ; NONEON-NOSVE-NEXT: strb w8, [x19, #1] ; NONEON-NOSVE-NEXT: strb w9, [x19] -; NONEON-NOSVE-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: add sp, sp, #48 ; NONEON-NOSVE-NEXT: ret %alloc = alloca [4 x i8] call void @def(ptr %alloc) @@ -88,21 +92,25 @@ define void @alloc_v6i8(ptr %st_ptr) nounwind { ; ; NONEON-NOSVE-LABEL: alloc_v6i8: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #32 -; NONEON-NOSVE-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: sub sp, sp, #48 +; NONEON-NOSVE-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: mov x19, x0 -; NONEON-NOSVE-NEXT: add x0, sp, #8 +; NONEON-NOSVE-NEXT: add x0, sp, #24 ; NONEON-NOSVE-NEXT: bl def -; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] -; NONEON-NOSVE-NEXT: add x9, x19, #2 -; NONEON-NOSVE-NEXT: rev16 v1.16b, v0.16b -; NONEON-NOSVE-NEXT: xtn v1.8b, v1.8h -; NONEON-NOSVE-NEXT: str s1, [sp, #4] -; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] -; NONEON-NOSVE-NEXT: st1 { v0.b }[5], [x9] -; NONEON-NOSVE-NEXT: strh w8, [x19] -; NONEON-NOSVE-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: str x8, [sp] +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #20] +; NONEON-NOSVE-NEXT: strb w8, [x19, #2] +; NONEON-NOSVE-NEXT: strh w9, [x19] +; NONEON-NOSVE-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: add sp, sp, #48 ; NONEON-NOSVE-NEXT: ret %alloc = alloca [6 x i8] call void @def(ptr %alloc) @@ -135,18 +143,38 @@ define void @alloc_v32i8(ptr %st_ptr) nounwind { ; ; NONEON-NOSVE-LABEL: alloc_v32i8: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #48 -; NONEON-NOSVE-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: sub sp, sp, #112 +; NONEON-NOSVE-NEXT: stp x30, x19, [sp, #96] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: mov x19, x0 -; NONEON-NOSVE-NEXT: mov x0, sp +; NONEON-NOSVE-NEXT: add x0, sp, #64 ; NONEON-NOSVE-NEXT: bl def -; NONEON-NOSVE-NEXT: ldp q0, q1, [sp] -; NONEON-NOSVE-NEXT: add x8, x19, #8 -; NONEON-NOSVE-NEXT: xtn v0.8b, v0.8h -; NONEON-NOSVE-NEXT: st1 { v1.b }[0], [x8] -; NONEON-NOSVE-NEXT: str d0, [x19] -; NONEON-NOSVE-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: str q1, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: strb w8, [x19, #8] +; NONEON-NOSVE-NEXT: str q0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #48] +; NONEON-NOSVE-NEXT: str x8, [x19] +; NONEON-NOSVE-NEXT: ldp x30, x19, [sp, #96] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: add sp, sp, #112 ; NONEON-NOSVE-NEXT: ret %alloc = alloca [32 x i8] call void @def(ptr %alloc) @@ -179,18 +207,26 @@ define void @alloc_v8f64(ptr %st_ptr) nounwind { ; ; NONEON-NOSVE-LABEL: alloc_v8f64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: sub sp, sp, #80 -; NONEON-NOSVE-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: sub sp, sp, #176 +; NONEON-NOSVE-NEXT: stp x30, x19, [sp, #160] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: mov x19, x0 -; NONEON-NOSVE-NEXT: mov x0, sp +; NONEON-NOSVE-NEXT: add x0, sp, #96 ; NONEON-NOSVE-NEXT: bl def -; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #32] -; NONEON-NOSVE-NEXT: ldp q3, q2, [sp] -; NONEON-NOSVE-NEXT: zip1 v0.2d, v1.2d, v0.2d -; NONEON-NOSVE-NEXT: zip1 v1.2d, v3.2d, v2.2d +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #96] +; NONEON-NOSVE-NEXT: ldp q2, q3, [sp, #128] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr d1, [sp, #64] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #48] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] +; NONEON-NOSVE-NEXT: ldr d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr d0, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #80] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] ; NONEON-NOSVE-NEXT: stp q1, q0, [x19] -; NONEON-NOSVE-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload -; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ldp x30, x19, [sp, #160] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: add sp, sp, #176 ; NONEON-NOSVE-NEXT: ret %alloc = alloca [8 x double] call void @def(ptr %alloc) diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll index 918f0ccc0cf6a..3b83f982b6bfc 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -18,6 +19,72 @@ define <4 x i32> @test(ptr %arg1, ptr %arg2) { ; CHECK-NEXT: stp q2, q5, [x0, #32] ; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: sub sp, sp, #144 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 144 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: str q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #60] +; NONEON-NOSVE-NEXT: str q2, [sp, #16] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: stp w8, w8, [sp, #8] +; NONEON-NOSVE-NEXT: str w9, [sp, #124] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #56] +; NONEON-NOSVE-NEXT: stp w8, w8, [sp] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: ldr q0, [sp] +; NONEON-NOSVE-NEXT: str w9, [sp, #120] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #52] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: str w9, [sp, #116] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #48] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: str w9, [sp, #112] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #28] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: str w9, [sp, #92] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #24] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: str w9, [sp, #88] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #20] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: str w9, [sp, #84] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #16] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: str w9, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #44] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #100] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #96] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldp q4, q2, [sp, #80] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #140] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #72] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #136] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #68] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #132] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #128] +; NONEON-NOSVE-NEXT: ldp q3, q1, [sp, #112] +; NONEON-NOSVE-NEXT: stp q4, q3, [x0] +; NONEON-NOSVE-NEXT: stp q1, q2, [x0, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #144 +; NONEON-NOSVE-NEXT: ret entry: %0 = load <16 x i32>, ptr %arg1, align 256 %1 = load <16 x i32>, ptr %arg2, align 256 @@ -42,6 +109,75 @@ define <2 x i32> @test2(ptr %arg1, ptr %arg2) { ; CHECK-NEXT: stp q3, q4, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test2: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: sub sp, sp, #144 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 144 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: str q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: str q2, [sp, #16] +; NONEON-NOSVE-NEXT: str d0, [sp] +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: stp w8, w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #124] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #120] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #116] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #112] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #108] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #104] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #100] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #96] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldp q4, q2, [sp, #80] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #140] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #72] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #136] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #68] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #132] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #128] +; NONEON-NOSVE-NEXT: ldp q3, q1, [sp, #112] +; NONEON-NOSVE-NEXT: stp q4, q3, [x0] +; NONEON-NOSVE-NEXT: stp q1, q2, [x0, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #144 +; NONEON-NOSVE-NEXT: ret entry: %0 = load <16 x i32>, ptr %arg1, align 256 %1 = load <16 x i32>, ptr %arg2, align 256 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll index 8c69d5b0bb375..c97a3c2e721a3 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -11,6 +12,22 @@ define <4 x i8> @load_v4i8(ptr %a) { ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrb w8, [x0, #3] +; NONEON-NOSVE-NEXT: strh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w8, [x0, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w8, [x0, #1] +; NONEON-NOSVE-NEXT: strh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w8, [x0] +; NONEON-NOSVE-NEXT: strh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %load = load <4 x i8>, ptr %a ret <4 x i8> %load } @@ -20,6 +37,11 @@ define <8 x i8> @load_v8i8(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <8 x i8>, ptr %a ret <8 x i8> %load } @@ -29,6 +51,11 @@ define <16 x i8> @load_v16i8(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <16 x i8>, ptr %a ret <16 x i8> %load } @@ -38,6 +65,11 @@ define <32 x i8> @load_v32i8(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <32 x i8>, ptr %a ret <32 x i8> %load } @@ -49,6 +81,18 @@ define <2 x i16> @load_v2i16(ptr %a) { ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrh w8, [x0, #2] +; NONEON-NOSVE-NEXT: str w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w8, [x0] +; NONEON-NOSVE-NEXT: str w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %load = load <2 x i16>, ptr %a ret <2 x i16> %load } @@ -58,6 +102,16 @@ define <2 x half> @load_v2f16(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr w8, [x0] +; NONEON-NOSVE-NEXT: str w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %load = load <2 x half>, ptr %a ret <2 x half> %load } @@ -67,6 +121,11 @@ define <4 x i16> @load_v4i16(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <4 x i16>, ptr %a ret <4 x i16> %load } @@ -76,6 +135,11 @@ define <4 x half> @load_v4f16(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <4 x half>, ptr %a ret <4 x half> %load } @@ -85,6 +149,11 @@ define <8 x i16> @load_v8i16(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <8 x i16>, ptr %a ret <8 x i16> %load } @@ -94,6 +163,11 @@ define <8 x half> @load_v8f16(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <8 x half>, ptr %a ret <8 x half> %load } @@ -103,6 +177,11 @@ define <16 x i16> @load_v16i16(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <16 x i16>, ptr %a ret <16 x i16> %load } @@ -112,6 +191,11 @@ define <16 x half> @load_v16f16(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <16 x half>, ptr %a ret <16 x half> %load } @@ -121,6 +205,11 @@ define <2 x i32> @load_v2i32(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <2 x i32>, ptr %a ret <2 x i32> %load } @@ -130,6 +219,11 @@ define <2 x float> @load_v2f32(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <2 x float>, ptr %a ret <2 x float> %load } @@ -139,6 +233,11 @@ define <4 x i32> @load_v4i32(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <4 x i32>, ptr %a ret <4 x i32> %load } @@ -148,6 +247,11 @@ define <4 x float> @load_v4f32(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <4 x float>, ptr %a ret <4 x float> %load } @@ -157,6 +261,11 @@ define <8 x i32> @load_v8i32(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <8 x i32>, ptr %a ret <8 x i32> %load } @@ -166,6 +275,11 @@ define <8 x float> @load_v8f32(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <8 x float>, ptr %a ret <8 x float> %load } @@ -175,6 +289,11 @@ define <1 x i64> @load_v1i64(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <1 x i64>, ptr %a ret <1 x i64> %load } @@ -184,6 +303,11 @@ define <1 x double> @load_v1f64(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <1 x double>, ptr %a ret <1 x double> %load } @@ -193,6 +317,11 @@ define <2 x i64> @load_v2i64(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <2 x i64>, ptr %a ret <2 x i64> %load } @@ -202,6 +331,11 @@ define <2 x double> @load_v2f64(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <2 x double>, ptr %a ret <2 x double> %load } @@ -211,6 +345,11 @@ define <4 x i64> @load_v4i64(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <4 x i64>, ptr %a ret <4 x i64> %load } @@ -220,6 +359,11 @@ define <4 x double> @load_v4f64(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <4 x double>, ptr %a ret <4 x double> %load } diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll index ef52eadc5d3b0..9e1edb817c459 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -17,6 +18,21 @@ define i8 @andv_v4i8(<4 x i8> %a) { ; CHECK-NEXT: andv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #8] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: and w10, w11, w10 +; NONEON-NOSVE-NEXT: and w0, w10, w8 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> %a) ret i8 %res } @@ -29,6 +45,29 @@ define i8 @andv_v8i8(<8 x i8> %a) { ; CHECK-NEXT: andv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #11] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #9] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #14] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: and w12, w13, w12 +; NONEON-NOSVE-NEXT: and w10, w11, w10 +; NONEON-NOSVE-NEXT: and w10, w12, w10 +; NONEON-NOSVE-NEXT: and w8, w8, w14 +; NONEON-NOSVE-NEXT: and w8, w10, w8 +; NONEON-NOSVE-NEXT: and w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %a) ret i8 %res } @@ -41,6 +80,44 @@ define i8 @andv_v16i8(<16 x i8> %a) { ; CHECK-NEXT: andv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1] +; NONEON-NOSVE-NEXT: ldrb w11, [sp] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #4] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #3] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #2] +; NONEON-NOSVE-NEXT: and w10, w11, w10 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #8] +; NONEON-NOSVE-NEXT: and w11, w14, w13 +; NONEON-NOSVE-NEXT: and w9, w12, w9 +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #6] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #7] +; NONEON-NOSVE-NEXT: and w10, w10, w11 +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #11] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #13] +; NONEON-NOSVE-NEXT: and w9, w9, w16 +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #9] +; NONEON-NOSVE-NEXT: and w12, w12, w15 +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #14] +; NONEON-NOSVE-NEXT: and w8, w13, w8 +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #15] +; NONEON-NOSVE-NEXT: and w12, w12, w14 +; NONEON-NOSVE-NEXT: and w8, w8, w11 +; NONEON-NOSVE-NEXT: and w9, w10, w9 +; NONEON-NOSVE-NEXT: and w10, w12, w16 +; NONEON-NOSVE-NEXT: and w8, w8, w15 +; NONEON-NOSVE-NEXT: and w9, w9, w10 +; NONEON-NOSVE-NEXT: and w8, w8, w13 +; NONEON-NOSVE-NEXT: and w0, w9, w8 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %a) ret i8 %res } @@ -54,6 +131,77 @@ define i8 @andv_v32i8(ptr %a) { ; CHECK-NEXT: andv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w11, [sp] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #2] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #3] +; NONEON-NOSVE-NEXT: and w9, w11, w10 +; NONEON-NOSVE-NEXT: and w10, w13, w12 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #5] +; NONEON-NOSVE-NEXT: and w11, w15, w14 +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #4] +; NONEON-NOSVE-NEXT: and w9, w10, w11 +; NONEON-NOSVE-NEXT: and w10, w13, w12 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #6] +; NONEON-NOSVE-NEXT: and w14, w17, w16 +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #8] +; NONEON-NOSVE-NEXT: and w10, w14, w10 +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #7] +; NONEON-NOSVE-NEXT: and w11, w12, w11 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #24] +; NONEON-NOSVE-NEXT: and w8, w8, w9 +; NONEON-NOSVE-NEXT: and w9, w10, w11 +; NONEON-NOSVE-NEXT: and w10, w14, w13 +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #9] +; NONEON-NOSVE-NEXT: and w8, w8, w9 +; NONEON-NOSVE-NEXT: and w11, w15, w12 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #26] +; NONEON-NOSVE-NEXT: and w9, w10, w11 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #10] +; NONEON-NOSVE-NEXT: and w10, w13, w12 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #11] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #12] +; NONEON-NOSVE-NEXT: and w9, w9, w10 +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #14] +; NONEON-NOSVE-NEXT: and w11, w12, w11 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #15] +; NONEON-NOSVE-NEXT: and w10, w13, w10 +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #13] +; NONEON-NOSVE-NEXT: and w14, w15, w14 +; NONEON-NOSVE-NEXT: and w10, w11, w10 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #30] +; NONEON-NOSVE-NEXT: and w9, w9, w14 +; NONEON-NOSVE-NEXT: and w12, w13, w12 +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #31] +; NONEON-NOSVE-NEXT: and w8, w8, w9 +; NONEON-NOSVE-NEXT: and w10, w10, w12 +; NONEON-NOSVE-NEXT: and w11, w16, w11 +; NONEON-NOSVE-NEXT: and w10, w10, w11 +; NONEON-NOSVE-NEXT: and w11, w17, w13 +; NONEON-NOSVE-NEXT: and w9, w10, w11 +; NONEON-NOSVE-NEXT: and w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %res = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %op) ret i8 %res @@ -67,6 +215,16 @@ define i16 @andv_v2i16(<2 x i16> %a) { ; CHECK-NEXT: andv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] +; NONEON-NOSVE-NEXT: and w0, w9, w8 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> %a) ret i16 %res } @@ -79,6 +237,21 @@ define i16 @andv_v4i16(<4 x i16> %a) { ; CHECK-NEXT: andv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #8] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: and w10, w11, w10 +; NONEON-NOSVE-NEXT: and w0, w10, w8 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %a) ret i16 %res } @@ -91,6 +264,28 @@ define i16 @andv_v8i16(<8 x i16> %a) { ; CHECK-NEXT: andv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #6] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #4] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w13, [sp] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: and w12, w13, w12 +; NONEON-NOSVE-NEXT: and w10, w11, w10 +; NONEON-NOSVE-NEXT: and w10, w12, w10 +; NONEON-NOSVE-NEXT: and w8, w8, w14 +; NONEON-NOSVE-NEXT: and w8, w10, w8 +; NONEON-NOSVE-NEXT: and w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %a) ret i16 %res } @@ -104,6 +299,45 @@ define i16 @andv_v16i16(ptr %a) { ; CHECK-NEXT: andv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w11, [sp] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #20] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #4] +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #6] +; NONEON-NOSVE-NEXT: and w9, w11, w10 +; NONEON-NOSVE-NEXT: and w12, w13, w12 +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #10] +; NONEON-NOSVE-NEXT: and w13, w15, w14 +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w16, [sp, #24] +; NONEON-NOSVE-NEXT: ldrh w17, [sp, #8] +; NONEON-NOSVE-NEXT: and w9, w12, w13 +; NONEON-NOSVE-NEXT: and w10, w11, w10 +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #12] +; NONEON-NOSVE-NEXT: and w14, w17, w16 +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #14] +; NONEON-NOSVE-NEXT: and w10, w14, w10 +; NONEON-NOSVE-NEXT: and w11, w15, w11 +; NONEON-NOSVE-NEXT: and w8, w8, w9 +; NONEON-NOSVE-NEXT: and w9, w10, w11 +; NONEON-NOSVE-NEXT: and w8, w8, w9 +; NONEON-NOSVE-NEXT: and w9, w13, w12 +; NONEON-NOSVE-NEXT: and w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %op) ret i16 %res @@ -117,6 +351,16 @@ define i32 @andv_v2i32(<2 x i32> %a) { ; CHECK-NEXT: andv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] +; NONEON-NOSVE-NEXT: and w0, w9, w8 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %a) ret i32 %res } @@ -129,6 +373,17 @@ define i32 @andv_v4i32(<4 x i32> %a) { ; CHECK-NEXT: andv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w11, w10, [sp], #16 +; NONEON-NOSVE-NEXT: and w10, w11, w10 +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: and w0, w10, w8 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a) ret i32 %res } @@ -142,6 +397,25 @@ define i32 @andv_v8i32(ptr %a) { ; CHECK-NEXT: andv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldp w11, w10, [sp] +; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #24] +; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #8] +; NONEON-NOSVE-NEXT: and w8, w10, w8 +; NONEON-NOSVE-NEXT: and w9, w11, w9 +; NONEON-NOSVE-NEXT: and w8, w9, w8 +; NONEON-NOSVE-NEXT: and w10, w14, w12 +; NONEON-NOSVE-NEXT: and w11, w15, w13 +; NONEON-NOSVE-NEXT: and w9, w10, w11 +; NONEON-NOSVE-NEXT: and w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %op) ret i32 %res @@ -155,6 +429,14 @@ define i64 @andv_v2i64(<2 x i64> %a) { ; CHECK-NEXT: andv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp x9, x8, [sp], #16 +; NONEON-NOSVE-NEXT: and x0, x9, x8 +; NONEON-NOSVE-NEXT: ret %res = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %a) ret i64 %res } @@ -168,6 +450,18 @@ define i64 @andv_v4i64(ptr %a) { ; CHECK-NEXT: andv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp x9, x8, [sp, #16] +; NONEON-NOSVE-NEXT: ldp x11, x10, [sp], #32 +; NONEON-NOSVE-NEXT: and x8, x10, x8 +; NONEON-NOSVE-NEXT: and x9, x11, x9 +; NONEON-NOSVE-NEXT: and x0, x9, x8 +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %op) ret i64 %res @@ -185,6 +479,21 @@ define i8 @eorv_v4i8(<4 x i8> %a) { ; CHECK-NEXT: eorv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #8] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: eor w10, w11, w10 +; NONEON-NOSVE-NEXT: eor w0, w10, w8 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> %a) ret i8 %res } @@ -197,6 +506,29 @@ define i8 @eorv_v8i8(<8 x i8> %a) { ; CHECK-NEXT: eorv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #11] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #9] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #14] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: eor w12, w13, w12 +; NONEON-NOSVE-NEXT: eor w10, w11, w10 +; NONEON-NOSVE-NEXT: eor w10, w12, w10 +; NONEON-NOSVE-NEXT: eor w8, w8, w14 +; NONEON-NOSVE-NEXT: eor w8, w10, w8 +; NONEON-NOSVE-NEXT: eor w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %a) ret i8 %res } @@ -209,6 +541,44 @@ define i8 @eorv_v16i8(<16 x i8> %a) { ; CHECK-NEXT: eorv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1] +; NONEON-NOSVE-NEXT: ldrb w11, [sp] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #4] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #3] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #2] +; NONEON-NOSVE-NEXT: eor w10, w11, w10 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #8] +; NONEON-NOSVE-NEXT: eor w11, w14, w13 +; NONEON-NOSVE-NEXT: eor w9, w12, w9 +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #6] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #7] +; NONEON-NOSVE-NEXT: eor w10, w10, w11 +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #11] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #13] +; NONEON-NOSVE-NEXT: eor w9, w9, w16 +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #9] +; NONEON-NOSVE-NEXT: eor w12, w12, w15 +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #14] +; NONEON-NOSVE-NEXT: eor w8, w13, w8 +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #15] +; NONEON-NOSVE-NEXT: eor w12, w12, w14 +; NONEON-NOSVE-NEXT: eor w8, w8, w11 +; NONEON-NOSVE-NEXT: eor w9, w10, w9 +; NONEON-NOSVE-NEXT: eor w10, w12, w16 +; NONEON-NOSVE-NEXT: eor w8, w8, w15 +; NONEON-NOSVE-NEXT: eor w9, w9, w10 +; NONEON-NOSVE-NEXT: eor w8, w8, w13 +; NONEON-NOSVE-NEXT: eor w0, w9, w8 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %a) ret i8 %res } @@ -222,6 +592,77 @@ define i8 @eorv_v32i8(ptr %a) { ; CHECK-NEXT: eorv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w11, [sp] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #2] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #3] +; NONEON-NOSVE-NEXT: eor w9, w11, w10 +; NONEON-NOSVE-NEXT: eor w10, w13, w12 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #5] +; NONEON-NOSVE-NEXT: eor w11, w15, w14 +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #4] +; NONEON-NOSVE-NEXT: eor w9, w10, w11 +; NONEON-NOSVE-NEXT: eor w10, w13, w12 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #6] +; NONEON-NOSVE-NEXT: eor w14, w17, w16 +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #8] +; NONEON-NOSVE-NEXT: eor w10, w14, w10 +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #7] +; NONEON-NOSVE-NEXT: eor w11, w12, w11 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #24] +; NONEON-NOSVE-NEXT: eor w8, w8, w9 +; NONEON-NOSVE-NEXT: eor w9, w10, w11 +; NONEON-NOSVE-NEXT: eor w10, w14, w13 +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #9] +; NONEON-NOSVE-NEXT: eor w8, w8, w9 +; NONEON-NOSVE-NEXT: eor w11, w15, w12 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #26] +; NONEON-NOSVE-NEXT: eor w9, w10, w11 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #10] +; NONEON-NOSVE-NEXT: eor w10, w13, w12 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #11] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #12] +; NONEON-NOSVE-NEXT: eor w9, w9, w10 +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #14] +; NONEON-NOSVE-NEXT: eor w11, w12, w11 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #15] +; NONEON-NOSVE-NEXT: eor w10, w13, w10 +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #13] +; NONEON-NOSVE-NEXT: eor w14, w15, w14 +; NONEON-NOSVE-NEXT: eor w10, w11, w10 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #30] +; NONEON-NOSVE-NEXT: eor w9, w9, w14 +; NONEON-NOSVE-NEXT: eor w12, w13, w12 +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #31] +; NONEON-NOSVE-NEXT: eor w8, w8, w9 +; NONEON-NOSVE-NEXT: eor w10, w10, w12 +; NONEON-NOSVE-NEXT: eor w11, w16, w11 +; NONEON-NOSVE-NEXT: eor w10, w10, w11 +; NONEON-NOSVE-NEXT: eor w11, w17, w13 +; NONEON-NOSVE-NEXT: eor w9, w10, w11 +; NONEON-NOSVE-NEXT: eor w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %res = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %op) ret i8 %res @@ -235,6 +676,16 @@ define i16 @eorv_v2i16(<2 x i16> %a) { ; CHECK-NEXT: eorv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] +; NONEON-NOSVE-NEXT: eor w0, w9, w8 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> %a) ret i16 %res } @@ -247,6 +698,21 @@ define i16 @eorv_v4i16(<4 x i16> %a) { ; CHECK-NEXT: eorv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #8] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: eor w10, w11, w10 +; NONEON-NOSVE-NEXT: eor w0, w10, w8 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %a) ret i16 %res } @@ -259,6 +725,28 @@ define i16 @eorv_v8i16(<8 x i16> %a) { ; CHECK-NEXT: eorv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #6] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #4] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w13, [sp] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: eor w12, w13, w12 +; NONEON-NOSVE-NEXT: eor w10, w11, w10 +; NONEON-NOSVE-NEXT: eor w10, w12, w10 +; NONEON-NOSVE-NEXT: eor w8, w8, w14 +; NONEON-NOSVE-NEXT: eor w8, w10, w8 +; NONEON-NOSVE-NEXT: eor w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %a) ret i16 %res } @@ -272,6 +760,45 @@ define i16 @eorv_v16i16(ptr %a) { ; CHECK-NEXT: eorv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w11, [sp] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #20] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #4] +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #6] +; NONEON-NOSVE-NEXT: eor w9, w11, w10 +; NONEON-NOSVE-NEXT: eor w12, w13, w12 +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #10] +; NONEON-NOSVE-NEXT: eor w13, w15, w14 +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w16, [sp, #24] +; NONEON-NOSVE-NEXT: ldrh w17, [sp, #8] +; NONEON-NOSVE-NEXT: eor w9, w12, w13 +; NONEON-NOSVE-NEXT: eor w10, w11, w10 +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #12] +; NONEON-NOSVE-NEXT: eor w14, w17, w16 +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #14] +; NONEON-NOSVE-NEXT: eor w10, w14, w10 +; NONEON-NOSVE-NEXT: eor w11, w15, w11 +; NONEON-NOSVE-NEXT: eor w8, w8, w9 +; NONEON-NOSVE-NEXT: eor w9, w10, w11 +; NONEON-NOSVE-NEXT: eor w8, w8, w9 +; NONEON-NOSVE-NEXT: eor w9, w13, w12 +; NONEON-NOSVE-NEXT: eor w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %op) ret i16 %res @@ -285,6 +812,16 @@ define i32 @eorv_v2i32(<2 x i32> %a) { ; CHECK-NEXT: eorv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] +; NONEON-NOSVE-NEXT: eor w0, w9, w8 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %a) ret i32 %res } @@ -297,6 +834,17 @@ define i32 @eorv_v4i32(<4 x i32> %a) { ; CHECK-NEXT: eorv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w11, w10, [sp], #16 +; NONEON-NOSVE-NEXT: eor w10, w11, w10 +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: eor w0, w10, w8 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %a) ret i32 %res } @@ -310,6 +858,25 @@ define i32 @eorv_v8i32(ptr %a) { ; CHECK-NEXT: eorv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldp w11, w10, [sp] +; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #24] +; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #8] +; NONEON-NOSVE-NEXT: eor w8, w10, w8 +; NONEON-NOSVE-NEXT: eor w9, w11, w9 +; NONEON-NOSVE-NEXT: eor w8, w9, w8 +; NONEON-NOSVE-NEXT: eor w10, w14, w12 +; NONEON-NOSVE-NEXT: eor w11, w15, w13 +; NONEON-NOSVE-NEXT: eor w9, w10, w11 +; NONEON-NOSVE-NEXT: eor w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %op) ret i32 %res @@ -323,6 +890,14 @@ define i64 @eorv_v2i64(<2 x i64> %a) { ; CHECK-NEXT: eorv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp x9, x8, [sp], #16 +; NONEON-NOSVE-NEXT: eor x0, x9, x8 +; NONEON-NOSVE-NEXT: ret %res = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %a) ret i64 %res } @@ -336,6 +911,18 @@ define i64 @eorv_v4i64(ptr %a) { ; CHECK-NEXT: eorv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp x9, x8, [sp, #16] +; NONEON-NOSVE-NEXT: ldp x11, x10, [sp], #32 +; NONEON-NOSVE-NEXT: eor x8, x10, x8 +; NONEON-NOSVE-NEXT: eor x9, x11, x9 +; NONEON-NOSVE-NEXT: eor x0, x9, x8 +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %op) ret i64 %res @@ -353,6 +940,21 @@ define i8 @orv_v4i8(<4 x i8> %a) { ; CHECK-NEXT: orv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #8] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: orr w10, w11, w10 +; NONEON-NOSVE-NEXT: orr w0, w10, w8 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> %a) ret i8 %res } @@ -365,6 +967,29 @@ define i8 @orv_v8i8(<8 x i8> %a) { ; CHECK-NEXT: orv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #11] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #9] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #14] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: orr w12, w13, w12 +; NONEON-NOSVE-NEXT: orr w10, w11, w10 +; NONEON-NOSVE-NEXT: orr w10, w12, w10 +; NONEON-NOSVE-NEXT: orr w8, w8, w14 +; NONEON-NOSVE-NEXT: orr w8, w10, w8 +; NONEON-NOSVE-NEXT: orr w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %a) ret i8 %res } @@ -377,6 +1002,44 @@ define i8 @orv_v16i8(<16 x i8> %a) { ; CHECK-NEXT: orv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1] +; NONEON-NOSVE-NEXT: ldrb w11, [sp] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #4] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #3] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #2] +; NONEON-NOSVE-NEXT: orr w10, w11, w10 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #8] +; NONEON-NOSVE-NEXT: orr w11, w14, w13 +; NONEON-NOSVE-NEXT: orr w9, w12, w9 +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #6] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #7] +; NONEON-NOSVE-NEXT: orr w10, w10, w11 +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #11] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #13] +; NONEON-NOSVE-NEXT: orr w9, w9, w16 +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #9] +; NONEON-NOSVE-NEXT: orr w12, w12, w15 +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #14] +; NONEON-NOSVE-NEXT: orr w8, w13, w8 +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #15] +; NONEON-NOSVE-NEXT: orr w12, w12, w14 +; NONEON-NOSVE-NEXT: orr w8, w8, w11 +; NONEON-NOSVE-NEXT: orr w9, w10, w9 +; NONEON-NOSVE-NEXT: orr w10, w12, w16 +; NONEON-NOSVE-NEXT: orr w8, w8, w15 +; NONEON-NOSVE-NEXT: orr w9, w9, w10 +; NONEON-NOSVE-NEXT: orr w8, w8, w13 +; NONEON-NOSVE-NEXT: orr w0, w9, w8 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %a) ret i8 %res } @@ -390,6 +1053,77 @@ define i8 @orv_v32i8(ptr %a) { ; CHECK-NEXT: orv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w11, [sp] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #2] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #3] +; NONEON-NOSVE-NEXT: orr w9, w11, w10 +; NONEON-NOSVE-NEXT: orr w10, w13, w12 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #5] +; NONEON-NOSVE-NEXT: orr w11, w15, w14 +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #4] +; NONEON-NOSVE-NEXT: orr w9, w10, w11 +; NONEON-NOSVE-NEXT: orr w10, w13, w12 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #6] +; NONEON-NOSVE-NEXT: orr w14, w17, w16 +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #8] +; NONEON-NOSVE-NEXT: orr w10, w14, w10 +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #7] +; NONEON-NOSVE-NEXT: orr w11, w12, w11 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #24] +; NONEON-NOSVE-NEXT: orr w8, w8, w9 +; NONEON-NOSVE-NEXT: orr w9, w10, w11 +; NONEON-NOSVE-NEXT: orr w10, w14, w13 +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #9] +; NONEON-NOSVE-NEXT: orr w8, w8, w9 +; NONEON-NOSVE-NEXT: orr w11, w15, w12 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #26] +; NONEON-NOSVE-NEXT: orr w9, w10, w11 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #10] +; NONEON-NOSVE-NEXT: orr w10, w13, w12 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #11] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #12] +; NONEON-NOSVE-NEXT: orr w9, w9, w10 +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w16, [sp, #14] +; NONEON-NOSVE-NEXT: orr w11, w12, w11 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w17, [sp, #15] +; NONEON-NOSVE-NEXT: orr w10, w13, w10 +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #13] +; NONEON-NOSVE-NEXT: orr w14, w15, w14 +; NONEON-NOSVE-NEXT: orr w10, w11, w10 +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #30] +; NONEON-NOSVE-NEXT: orr w9, w9, w14 +; NONEON-NOSVE-NEXT: orr w12, w13, w12 +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #31] +; NONEON-NOSVE-NEXT: orr w8, w8, w9 +; NONEON-NOSVE-NEXT: orr w10, w10, w12 +; NONEON-NOSVE-NEXT: orr w11, w16, w11 +; NONEON-NOSVE-NEXT: orr w10, w10, w11 +; NONEON-NOSVE-NEXT: orr w11, w17, w13 +; NONEON-NOSVE-NEXT: orr w9, w10, w11 +; NONEON-NOSVE-NEXT: orr w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %res = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %op) ret i8 %res @@ -403,6 +1137,16 @@ define i16 @orv_v2i16(<2 x i16> %a) { ; CHECK-NEXT: orv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] +; NONEON-NOSVE-NEXT: orr w0, w9, w8 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> %a) ret i16 %res } @@ -415,6 +1159,21 @@ define i16 @orv_v4i16(<4 x i16> %a) { ; CHECK-NEXT: orv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #8] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: orr w10, w11, w10 +; NONEON-NOSVE-NEXT: orr w0, w10, w8 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %a) ret i16 %res } @@ -427,6 +1186,28 @@ define i16 @orv_v8i16(<8 x i16> %a) { ; CHECK-NEXT: orv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #6] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #4] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w13, [sp] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: orr w12, w13, w12 +; NONEON-NOSVE-NEXT: orr w10, w11, w10 +; NONEON-NOSVE-NEXT: orr w10, w12, w10 +; NONEON-NOSVE-NEXT: orr w8, w8, w14 +; NONEON-NOSVE-NEXT: orr w8, w10, w8 +; NONEON-NOSVE-NEXT: orr w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %a) ret i16 %res } @@ -440,6 +1221,45 @@ define i16 @orv_v16i16(ptr %a) { ; CHECK-NEXT: orv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w11, [sp] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #20] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #4] +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #6] +; NONEON-NOSVE-NEXT: orr w9, w11, w10 +; NONEON-NOSVE-NEXT: orr w12, w13, w12 +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #10] +; NONEON-NOSVE-NEXT: orr w13, w15, w14 +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w16, [sp, #24] +; NONEON-NOSVE-NEXT: ldrh w17, [sp, #8] +; NONEON-NOSVE-NEXT: orr w9, w12, w13 +; NONEON-NOSVE-NEXT: orr w10, w11, w10 +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #12] +; NONEON-NOSVE-NEXT: orr w14, w17, w16 +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #14] +; NONEON-NOSVE-NEXT: orr w10, w14, w10 +; NONEON-NOSVE-NEXT: orr w11, w15, w11 +; NONEON-NOSVE-NEXT: orr w8, w8, w9 +; NONEON-NOSVE-NEXT: orr w9, w10, w11 +; NONEON-NOSVE-NEXT: orr w8, w8, w9 +; NONEON-NOSVE-NEXT: orr w9, w13, w12 +; NONEON-NOSVE-NEXT: orr w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %op) ret i16 %res @@ -453,6 +1273,16 @@ define i32 @orv_v2i32(<2 x i32> %a) { ; CHECK-NEXT: orv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] +; NONEON-NOSVE-NEXT: orr w0, w9, w8 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %a) ret i32 %res } @@ -465,6 +1295,17 @@ define i32 @orv_v4i32(<4 x i32> %a) { ; CHECK-NEXT: orv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w11, w10, [sp], #16 +; NONEON-NOSVE-NEXT: orr w10, w11, w10 +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: orr w0, w10, w8 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a) ret i32 %res } @@ -478,6 +1319,25 @@ define i32 @orv_v8i32(ptr %a) { ; CHECK-NEXT: orv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldp w11, w10, [sp] +; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #24] +; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #8] +; NONEON-NOSVE-NEXT: orr w8, w10, w8 +; NONEON-NOSVE-NEXT: orr w9, w11, w9 +; NONEON-NOSVE-NEXT: orr w8, w9, w8 +; NONEON-NOSVE-NEXT: orr w10, w14, w12 +; NONEON-NOSVE-NEXT: orr w11, w15, w13 +; NONEON-NOSVE-NEXT: orr w9, w10, w11 +; NONEON-NOSVE-NEXT: orr w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %op) ret i32 %res @@ -491,6 +1351,14 @@ define i64 @orv_v2i64(<2 x i64> %a) { ; CHECK-NEXT: orv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp x9, x8, [sp], #16 +; NONEON-NOSVE-NEXT: orr x0, x9, x8 +; NONEON-NOSVE-NEXT: ret %res = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %a) ret i64 %res } @@ -504,6 +1372,18 @@ define i64 @orv_v4i64(ptr %a) { ; CHECK-NEXT: orv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp x9, x8, [sp, #16] +; NONEON-NOSVE-NEXT: ldp x11, x10, [sp], #32 +; NONEON-NOSVE-NEXT: orr x8, x10, x8 +; NONEON-NOSVE-NEXT: orr x9, x11, x9 +; NONEON-NOSVE-NEXT: orr x0, x9, x8 +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %op) ret i64 %res diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll index 4f8f8c2e4b244..be335c697707d 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -19,6 +20,87 @@ define <4 x i8> @masked_load_v4i8(ptr %src, <4 x i1> %mask) { ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #128 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 +; NONEON-NOSVE-NEXT: str d0, [sp, #112] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #114] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #116] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #118] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #112] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x2 +; NONEON-NOSVE-NEXT: and w9, w9, #0x4 +; NONEON-NOSVE-NEXT: and w10, w10, #0x8 +; NONEON-NOSVE-NEXT: bfxil w8, w11, #0, #1 +; NONEON-NOSVE-NEXT: orr w9, w9, w10 +; NONEON-NOSVE-NEXT: orr w8, w8, w9 +; NONEON-NOSVE-NEXT: tbz w8, #0, .LBB0_2 +; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load +; NONEON-NOSVE-NEXT: ldrb w9, [x0] +; NONEON-NOSVE-NEXT: strh wzr, [sp, #110] +; NONEON-NOSVE-NEXT: stur wzr, [sp, #106] +; NONEON-NOSVE-NEXT: strh w9, [sp, #104] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #104] +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB0_3 +; NONEON-NOSVE-NEXT: b .LBB0_4 +; NONEON-NOSVE-NEXT: .LBB0_2: +; NONEON-NOSVE-NEXT: adrp x9, .LCPI0_0 +; NONEON-NOSVE-NEXT: ldr d0, [x9, :lo12:.LCPI0_0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB0_4 +; NONEON-NOSVE-NEXT: .LBB0_3: // %cond.load1 +; NONEON-NOSVE-NEXT: ldrb w9, [x0, #1] +; NONEON-NOSVE-NEXT: str d0, [sp, #80] +; NONEON-NOSVE-NEXT: strh w9, [sp, #96] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #84] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #96] +; NONEON-NOSVE-NEXT: str w9, [sp, #92] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #80] +; NONEON-NOSVE-NEXT: str d0, [sp, #72] +; NONEON-NOSVE-NEXT: strh w9, [sp, #88] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #72] +; NONEON-NOSVE-NEXT: strh w9, [sp, #90] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #88] +; NONEON-NOSVE-NEXT: .LBB0_4: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB0_7 +; NONEON-NOSVE-NEXT: // %bb.5: // %else5 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB0_8 +; NONEON-NOSVE-NEXT: .LBB0_6: // %else8 +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB0_7: // %cond.load4 +; NONEON-NOSVE-NEXT: ldrb w9, [x0, #2] +; NONEON-NOSVE-NEXT: str d0, [sp, #48] +; NONEON-NOSVE-NEXT: strh w9, [sp, #64] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #54] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #64] +; NONEON-NOSVE-NEXT: strh w9, [sp, #62] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #48] +; NONEON-NOSVE-NEXT: str d0, [sp, #40] +; NONEON-NOSVE-NEXT: str w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #40] +; NONEON-NOSVE-NEXT: strh w9, [sp, #60] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #56] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB0_6 +; NONEON-NOSVE-NEXT: .LBB0_8: // %cond.load7 +; NONEON-NOSVE-NEXT: ldrb w8, [x0, #3] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: str d0, [sp, #16] +; NONEON-NOSVE-NEXT: str w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret %load = call <4 x i8> @llvm.masked.load.v4i8(ptr %src, i32 8, <4 x i1> %mask, <4 x i8> zeroinitializer) ret <4 x i8> %load } @@ -34,6 +116,186 @@ define <8 x i8> @masked_load_v8i8(ptr %src, <8 x i1> %mask) { ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #272 +; NONEON-NOSVE-NEXT: str x29, [sp, #256] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 272 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: str d0, [sp, #240] +; NONEON-NOSVE-NEXT: add x9, sp, #176 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #242] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #243] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #241] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #244] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #245] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #246] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w11, w11, #0, #1 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #240] +; NONEON-NOSVE-NEXT: sbfx w13, w13, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w14, w14, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x4 +; NONEON-NOSVE-NEXT: and w10, w10, #0x8 +; NONEON-NOSVE-NEXT: sbfx w15, w15, #0, #1 +; NONEON-NOSVE-NEXT: orr w8, w8, w10 +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #247] +; NONEON-NOSVE-NEXT: and w11, w11, #0x2 +; NONEON-NOSVE-NEXT: and w13, w13, #0x10 +; NONEON-NOSVE-NEXT: bfxil w11, w12, #0, #1 +; NONEON-NOSVE-NEXT: and w12, w14, #0x20 +; NONEON-NOSVE-NEXT: orr w8, w8, w13 +; NONEON-NOSVE-NEXT: and w13, w15, #0x40 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: orr w8, w11, w8 +; NONEON-NOSVE-NEXT: orr w11, w12, w13 +; NONEON-NOSVE-NEXT: orr w8, w8, w11 +; NONEON-NOSVE-NEXT: and w10, w10, #0x80 +; NONEON-NOSVE-NEXT: add w10, w8, w10 +; NONEON-NOSVE-NEXT: and w8, w10, #0xff +; NONEON-NOSVE-NEXT: tbz w10, #0, .LBB1_2 +; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load +; NONEON-NOSVE-NEXT: ldrb w10, [x0] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #239] +; NONEON-NOSVE-NEXT: sturh wzr, [x9, #61] +; NONEON-NOSVE-NEXT: stur wzr, [x9, #57] +; NONEON-NOSVE-NEXT: strb w10, [sp, #232] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #232] +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB1_3 +; NONEON-NOSVE-NEXT: b .LBB1_4 +; NONEON-NOSVE-NEXT: .LBB1_2: +; NONEON-NOSVE-NEXT: adrp x10, .LCPI1_0 +; NONEON-NOSVE-NEXT: ldr d0, [x10, :lo12:.LCPI1_0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB1_4 +; NONEON-NOSVE-NEXT: .LBB1_3: // %cond.load1 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #1] +; NONEON-NOSVE-NEXT: str d0, [sp, #208] +; NONEON-NOSVE-NEXT: strb w10, [sp, #224] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #214] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #224] +; NONEON-NOSVE-NEXT: strh w10, [sp, #222] +; NONEON-NOSVE-NEXT: str d0, [sp, #200] +; NONEON-NOSVE-NEXT: ldur w10, [x9, #34] +; NONEON-NOSVE-NEXT: stur w10, [x9, #42] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #208] +; NONEON-NOSVE-NEXT: strb w10, [sp, #216] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #200] +; NONEON-NOSVE-NEXT: strb w10, [sp, #217] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #216] +; NONEON-NOSVE-NEXT: .LBB1_4: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB1_12 +; NONEON-NOSVE-NEXT: // %bb.5: // %else5 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB1_13 +; NONEON-NOSVE-NEXT: .LBB1_6: // %else8 +; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB1_14 +; NONEON-NOSVE-NEXT: .LBB1_7: // %else11 +; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB1_15 +; NONEON-NOSVE-NEXT: .LBB1_8: // %else14 +; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB1_16 +; NONEON-NOSVE-NEXT: .LBB1_9: // %else17 +; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB1_11 +; NONEON-NOSVE-NEXT: .LBB1_10: // %cond.load19 +; NONEON-NOSVE-NEXT: ldrb w8, [x0, #7] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: str d0, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: str w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: .LBB1_11: // %else20 +; NONEON-NOSVE-NEXT: ldr x29, [sp, #256] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: add sp, sp, #272 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB1_12: // %cond.load4 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #2] +; NONEON-NOSVE-NEXT: str d0, [sp, #176] +; NONEON-NOSVE-NEXT: strb w10, [sp, #192] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #183] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #192] +; NONEON-NOSVE-NEXT: strb w10, [sp, #191] +; NONEON-NOSVE-NEXT: str d0, [sp, #168] +; NONEON-NOSVE-NEXT: ldur w10, [x9, #3] +; NONEON-NOSVE-NEXT: stur w10, [x9, #11] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #176] +; NONEON-NOSVE-NEXT: strh w9, [sp, #184] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #168] +; NONEON-NOSVE-NEXT: strb w9, [sp, #186] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #184] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB1_6 +; NONEON-NOSVE-NEXT: .LBB1_13: // %cond.load7 +; NONEON-NOSVE-NEXT: ldrb w9, [x0, #3] +; NONEON-NOSVE-NEXT: str d0, [sp, #144] +; NONEON-NOSVE-NEXT: strb w9, [sp, #160] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #148] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #160] +; NONEON-NOSVE-NEXT: str w9, [sp, #156] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #146] +; NONEON-NOSVE-NEXT: str d0, [sp, #136] +; NONEON-NOSVE-NEXT: strb w9, [sp, #154] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #144] +; NONEON-NOSVE-NEXT: strh w9, [sp, #152] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #136] +; NONEON-NOSVE-NEXT: strb w9, [sp, #155] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #152] +; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB1_7 +; NONEON-NOSVE-NEXT: .LBB1_14: // %cond.load10 +; NONEON-NOSVE-NEXT: ldrb w9, [x0, #4] +; NONEON-NOSVE-NEXT: str d0, [sp, #112] +; NONEON-NOSVE-NEXT: strb w9, [sp, #128] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #119] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #128] +; NONEON-NOSVE-NEXT: strb w9, [sp, #127] +; NONEON-NOSVE-NEXT: ldurh w9, [sp, #117] +; NONEON-NOSVE-NEXT: str d0, [sp, #104] +; NONEON-NOSVE-NEXT: sturh w9, [sp, #125] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #112] +; NONEON-NOSVE-NEXT: str w9, [sp, #120] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #104] +; NONEON-NOSVE-NEXT: strb w9, [sp, #124] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #120] +; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB1_8 +; NONEON-NOSVE-NEXT: .LBB1_15: // %cond.load13 +; NONEON-NOSVE-NEXT: ldrb w9, [x0, #5] +; NONEON-NOSVE-NEXT: str d0, [sp, #80] +; NONEON-NOSVE-NEXT: strb w9, [sp, #96] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #86] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #96] +; NONEON-NOSVE-NEXT: strh w9, [sp, #94] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #84] +; NONEON-NOSVE-NEXT: str d0, [sp, #72] +; NONEON-NOSVE-NEXT: strb w9, [sp, #92] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #80] +; NONEON-NOSVE-NEXT: str w9, [sp, #88] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #72] +; NONEON-NOSVE-NEXT: strb w9, [sp, #93] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #88] +; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB1_9 +; NONEON-NOSVE-NEXT: .LBB1_16: // %cond.load16 +; NONEON-NOSVE-NEXT: ldrb w9, [x0, #6] +; NONEON-NOSVE-NEXT: str d0, [sp, #48] +; NONEON-NOSVE-NEXT: strb w9, [sp, #64] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #55] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #64] +; NONEON-NOSVE-NEXT: strb w9, [sp, #63] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #52] +; NONEON-NOSVE-NEXT: str d0, [sp, #40] +; NONEON-NOSVE-NEXT: strh w9, [sp, #60] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #48] +; NONEON-NOSVE-NEXT: str w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: strb w9, [sp, #62] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #56] +; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB1_10 +; NONEON-NOSVE-NEXT: b .LBB1_11 %load = call <8 x i8> @llvm.masked.load.v8i8(ptr %src, i32 8, <8 x i1> %mask, <8 x i8> zeroinitializer) ret <8 x i8> %load } @@ -49,6 +311,416 @@ define <16 x i8> @masked_load_v16i8(ptr %src, <16 x i1> %mask) { ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: sub sp, sp, #1024 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 1040 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: str q0, [sp, #976] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #984] +; NONEON-NOSVE-NEXT: and w8, w8, #0x1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #1000] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #976] +; NONEON-NOSVE-NEXT: and w8, w8, #0x1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #992] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #991] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x80 +; NONEON-NOSVE-NEXT: strb w8, [sp, #1007] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #990] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x40 +; NONEON-NOSVE-NEXT: strb w8, [sp, #1006] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #989] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x20 +; NONEON-NOSVE-NEXT: strb w8, [sp, #1005] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #988] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x10 +; NONEON-NOSVE-NEXT: strb w8, [sp, #1004] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #987] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #1003] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #986] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x4 +; NONEON-NOSVE-NEXT: strb w8, [sp, #1002] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #985] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x2 +; NONEON-NOSVE-NEXT: strb w8, [sp, #1001] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #983] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x80 +; NONEON-NOSVE-NEXT: strb w8, [sp, #999] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #982] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x40 +; NONEON-NOSVE-NEXT: strb w8, [sp, #998] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #981] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x20 +; NONEON-NOSVE-NEXT: strb w8, [sp, #997] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #980] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x10 +; NONEON-NOSVE-NEXT: strb w8, [sp, #996] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #979] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #995] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #978] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x4 +; NONEON-NOSVE-NEXT: strb w8, [sp, #994] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #977] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x2 +; NONEON-NOSVE-NEXT: strb w8, [sp, #993] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #992] +; NONEON-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; NONEON-NOSVE-NEXT: zip1 v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: str q0, [sp, #1008] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #1010] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #1008] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #1012] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #1014] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #1016] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #1018] +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #1020] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: add w9, w10, w11 +; NONEON-NOSVE-NEXT: add w10, w12, w13 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: add w9, w10, w14 +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #1022] +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: add x9, sp, #720 +; NONEON-NOSVE-NEXT: add w8, w8, w10 +; NONEON-NOSVE-NEXT: tbz w8, #0, .LBB2_2 +; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load +; NONEON-NOSVE-NEXT: ldrb w10, [x0] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #975] +; NONEON-NOSVE-NEXT: sturh wzr, [x9, #253] +; NONEON-NOSVE-NEXT: stur wzr, [x9, #249] +; NONEON-NOSVE-NEXT: stur xzr, [x9, #241] +; NONEON-NOSVE-NEXT: strb w10, [sp, #960] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #960] +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB2_3 +; NONEON-NOSVE-NEXT: b .LBB2_4 +; NONEON-NOSVE-NEXT: .LBB2_2: +; NONEON-NOSVE-NEXT: adrp x10, .LCPI2_0 +; NONEON-NOSVE-NEXT: ldr q0, [x10, :lo12:.LCPI2_0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB2_4 +; NONEON-NOSVE-NEXT: .LBB2_3: // %cond.load1 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #1] +; NONEON-NOSVE-NEXT: str q0, [sp, #912] +; NONEON-NOSVE-NEXT: strb w10, [sp, #944] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #926] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #944] +; NONEON-NOSVE-NEXT: strh w10, [sp, #942] +; NONEON-NOSVE-NEXT: str q0, [sp, #896] +; NONEON-NOSVE-NEXT: ldur w10, [x9, #202] +; NONEON-NOSVE-NEXT: ldur x11, [x9, #194] +; NONEON-NOSVE-NEXT: stur w10, [x9, #218] +; NONEON-NOSVE-NEXT: stur x11, [x9, #210] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #912] +; NONEON-NOSVE-NEXT: strb w10, [sp, #928] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #896] +; NONEON-NOSVE-NEXT: strb w10, [sp, #929] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #928] +; NONEON-NOSVE-NEXT: .LBB2_4: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB2_20 +; NONEON-NOSVE-NEXT: // %bb.5: // %else5 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB2_21 +; NONEON-NOSVE-NEXT: .LBB2_6: // %else8 +; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB2_22 +; NONEON-NOSVE-NEXT: .LBB2_7: // %else11 +; NONEON-NOSVE-NEXT: add x9, sp, #464 +; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB2_23 +; NONEON-NOSVE-NEXT: .LBB2_8: // %else14 +; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB2_24 +; NONEON-NOSVE-NEXT: .LBB2_9: // %else17 +; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB2_25 +; NONEON-NOSVE-NEXT: .LBB2_10: // %else20 +; NONEON-NOSVE-NEXT: tbnz w8, #8, .LBB2_26 +; NONEON-NOSVE-NEXT: .LBB2_11: // %else23 +; NONEON-NOSVE-NEXT: add x9, sp, #208 +; NONEON-NOSVE-NEXT: tbnz w8, #9, .LBB2_27 +; NONEON-NOSVE-NEXT: .LBB2_12: // %else26 +; NONEON-NOSVE-NEXT: tbnz w8, #10, .LBB2_28 +; NONEON-NOSVE-NEXT: .LBB2_13: // %else29 +; NONEON-NOSVE-NEXT: tbnz w8, #11, .LBB2_29 +; NONEON-NOSVE-NEXT: .LBB2_14: // %else32 +; NONEON-NOSVE-NEXT: tbnz w8, #12, .LBB2_30 +; NONEON-NOSVE-NEXT: .LBB2_15: // %else35 +; NONEON-NOSVE-NEXT: tbnz w8, #13, .LBB2_31 +; NONEON-NOSVE-NEXT: .LBB2_16: // %else38 +; NONEON-NOSVE-NEXT: tbnz w8, #14, .LBB2_32 +; NONEON-NOSVE-NEXT: .LBB2_17: // %else41 +; NONEON-NOSVE-NEXT: tbz w8, #15, .LBB2_19 +; NONEON-NOSVE-NEXT: .LBB2_18: // %cond.load43 +; NONEON-NOSVE-NEXT: ldrb w8, [x0, #15] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: str w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: str x8, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: .LBB2_19: // %else44 +; NONEON-NOSVE-NEXT: add sp, sp, #1024 +; NONEON-NOSVE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB2_20: // %cond.load4 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #2] +; NONEON-NOSVE-NEXT: str q0, [sp, #848] +; NONEON-NOSVE-NEXT: strb w10, [sp, #880] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #863] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #880] +; NONEON-NOSVE-NEXT: strb w10, [sp, #879] +; NONEON-NOSVE-NEXT: str q0, [sp, #832] +; NONEON-NOSVE-NEXT: ldur w10, [x9, #139] +; NONEON-NOSVE-NEXT: ldur x11, [x9, #131] +; NONEON-NOSVE-NEXT: stur w10, [x9, #155] +; NONEON-NOSVE-NEXT: stur x11, [x9, #147] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #848] +; NONEON-NOSVE-NEXT: strh w10, [sp, #864] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #832] +; NONEON-NOSVE-NEXT: strb w10, [sp, #866] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #864] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB2_6 +; NONEON-NOSVE-NEXT: .LBB2_21: // %cond.load7 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #3] +; NONEON-NOSVE-NEXT: str q0, [sp, #784] +; NONEON-NOSVE-NEXT: strb w10, [sp, #816] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #796] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #816] +; NONEON-NOSVE-NEXT: str w10, [sp, #812] +; NONEON-NOSVE-NEXT: str q0, [sp, #768] +; NONEON-NOSVE-NEXT: ldur x10, [x9, #68] +; NONEON-NOSVE-NEXT: stur x10, [x9, #84] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #786] +; NONEON-NOSVE-NEXT: strb w10, [sp, #802] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #784] +; NONEON-NOSVE-NEXT: strh w10, [sp, #800] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #768] +; NONEON-NOSVE-NEXT: strb w10, [sp, #803] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #800] +; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB2_7 +; NONEON-NOSVE-NEXT: .LBB2_22: // %cond.load10 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #4] +; NONEON-NOSVE-NEXT: str q0, [sp, #720] +; NONEON-NOSVE-NEXT: strb w10, [sp, #752] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #735] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #752] +; NONEON-NOSVE-NEXT: strb w10, [sp, #751] +; NONEON-NOSVE-NEXT: str q0, [sp, #704] +; NONEON-NOSVE-NEXT: ldurh w10, [x9, #13] +; NONEON-NOSVE-NEXT: ldur x11, [x9, #5] +; NONEON-NOSVE-NEXT: sturh w10, [x9, #29] +; NONEON-NOSVE-NEXT: stur x11, [x9, #21] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #720] +; NONEON-NOSVE-NEXT: str w9, [sp, #736] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #704] +; NONEON-NOSVE-NEXT: strb w9, [sp, #740] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #736] +; NONEON-NOSVE-NEXT: add x9, sp, #464 +; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB2_8 +; NONEON-NOSVE-NEXT: .LBB2_23: // %cond.load13 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #5] +; NONEON-NOSVE-NEXT: str q0, [sp, #656] +; NONEON-NOSVE-NEXT: strb w10, [sp, #688] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #670] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #688] +; NONEON-NOSVE-NEXT: strh w10, [sp, #686] +; NONEON-NOSVE-NEXT: str q0, [sp, #640] +; NONEON-NOSVE-NEXT: ldur x10, [x9, #198] +; NONEON-NOSVE-NEXT: stur x10, [x9, #214] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #660] +; NONEON-NOSVE-NEXT: strb w10, [sp, #676] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #656] +; NONEON-NOSVE-NEXT: str w10, [sp, #672] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #640] +; NONEON-NOSVE-NEXT: strb w10, [sp, #677] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #672] +; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB2_9 +; NONEON-NOSVE-NEXT: .LBB2_24: // %cond.load16 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #6] +; NONEON-NOSVE-NEXT: str q0, [sp, #592] +; NONEON-NOSVE-NEXT: strb w10, [sp, #624] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #607] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #624] +; NONEON-NOSVE-NEXT: strb w10, [sp, #623] +; NONEON-NOSVE-NEXT: str q0, [sp, #576] +; NONEON-NOSVE-NEXT: ldur x10, [x9, #135] +; NONEON-NOSVE-NEXT: stur x10, [x9, #151] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #596] +; NONEON-NOSVE-NEXT: strh w10, [sp, #612] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #592] +; NONEON-NOSVE-NEXT: str w10, [sp, #608] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #576] +; NONEON-NOSVE-NEXT: strb w10, [sp, #614] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #608] +; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB2_10 +; NONEON-NOSVE-NEXT: .LBB2_25: // %cond.load19 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #7] +; NONEON-NOSVE-NEXT: str q0, [sp, #528] +; NONEON-NOSVE-NEXT: strb w10, [sp, #560] +; NONEON-NOSVE-NEXT: ldr x10, [sp, #536] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #560] +; NONEON-NOSVE-NEXT: str x10, [sp, #552] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #534] +; NONEON-NOSVE-NEXT: str q0, [sp, #512] +; NONEON-NOSVE-NEXT: strb w10, [sp, #550] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #532] +; NONEON-NOSVE-NEXT: strh w10, [sp, #548] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #528] +; NONEON-NOSVE-NEXT: str w10, [sp, #544] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #512] +; NONEON-NOSVE-NEXT: strb w10, [sp, #551] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #544] +; NONEON-NOSVE-NEXT: tbz w8, #8, .LBB2_11 +; NONEON-NOSVE-NEXT: .LBB2_26: // %cond.load22 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #8] +; NONEON-NOSVE-NEXT: str q0, [sp, #464] +; NONEON-NOSVE-NEXT: strb w10, [sp, #496] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #479] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #496] +; NONEON-NOSVE-NEXT: strb w10, [sp, #495] +; NONEON-NOSVE-NEXT: str q0, [sp, #448] +; NONEON-NOSVE-NEXT: ldurh w10, [x9, #13] +; NONEON-NOSVE-NEXT: ldur w11, [x9, #9] +; NONEON-NOSVE-NEXT: sturh w10, [x9, #29] +; NONEON-NOSVE-NEXT: stur w11, [x9, #25] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #464] +; NONEON-NOSVE-NEXT: str x9, [sp, #480] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #448] +; NONEON-NOSVE-NEXT: strb w9, [sp, #488] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #480] +; NONEON-NOSVE-NEXT: add x9, sp, #208 +; NONEON-NOSVE-NEXT: tbz w8, #9, .LBB2_12 +; NONEON-NOSVE-NEXT: .LBB2_27: // %cond.load25 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #9] +; NONEON-NOSVE-NEXT: str q0, [sp, #400] +; NONEON-NOSVE-NEXT: strb w10, [sp, #432] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #414] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #432] +; NONEON-NOSVE-NEXT: strh w10, [sp, #430] +; NONEON-NOSVE-NEXT: str q0, [sp, #384] +; NONEON-NOSVE-NEXT: ldur w10, [x9, #202] +; NONEON-NOSVE-NEXT: stur w10, [x9, #218] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #408] +; NONEON-NOSVE-NEXT: strb w10, [sp, #424] +; NONEON-NOSVE-NEXT: ldr x10, [sp, #400] +; NONEON-NOSVE-NEXT: str x10, [sp, #416] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #384] +; NONEON-NOSVE-NEXT: strb w10, [sp, #425] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #416] +; NONEON-NOSVE-NEXT: tbz w8, #10, .LBB2_13 +; NONEON-NOSVE-NEXT: .LBB2_28: // %cond.load28 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #10] +; NONEON-NOSVE-NEXT: str q0, [sp, #336] +; NONEON-NOSVE-NEXT: strb w10, [sp, #368] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #351] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #368] +; NONEON-NOSVE-NEXT: strb w10, [sp, #367] +; NONEON-NOSVE-NEXT: str q0, [sp, #320] +; NONEON-NOSVE-NEXT: ldur w10, [x9, #139] +; NONEON-NOSVE-NEXT: stur w10, [x9, #155] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #344] +; NONEON-NOSVE-NEXT: strh w10, [sp, #360] +; NONEON-NOSVE-NEXT: ldr x10, [sp, #336] +; NONEON-NOSVE-NEXT: str x10, [sp, #352] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #320] +; NONEON-NOSVE-NEXT: strb w10, [sp, #362] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #352] +; NONEON-NOSVE-NEXT: tbz w8, #11, .LBB2_14 +; NONEON-NOSVE-NEXT: .LBB2_29: // %cond.load31 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #11] +; NONEON-NOSVE-NEXT: str q0, [sp, #272] +; NONEON-NOSVE-NEXT: strb w10, [sp, #304] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #284] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #304] +; NONEON-NOSVE-NEXT: str w10, [sp, #300] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #282] +; NONEON-NOSVE-NEXT: str q0, [sp, #256] +; NONEON-NOSVE-NEXT: strb w10, [sp, #298] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #280] +; NONEON-NOSVE-NEXT: strh w10, [sp, #296] +; NONEON-NOSVE-NEXT: ldr x10, [sp, #272] +; NONEON-NOSVE-NEXT: str x10, [sp, #288] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #256] +; NONEON-NOSVE-NEXT: strb w10, [sp, #299] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #288] +; NONEON-NOSVE-NEXT: tbz w8, #12, .LBB2_15 +; NONEON-NOSVE-NEXT: .LBB2_30: // %cond.load34 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #12] +; NONEON-NOSVE-NEXT: str q0, [sp, #208] +; NONEON-NOSVE-NEXT: strb w10, [sp, #240] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #223] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #240] +; NONEON-NOSVE-NEXT: strb w10, [sp, #239] +; NONEON-NOSVE-NEXT: str q0, [sp, #192] +; NONEON-NOSVE-NEXT: ldurh w10, [x9, #13] +; NONEON-NOSVE-NEXT: sturh w10, [x9, #29] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #216] +; NONEON-NOSVE-NEXT: str w9, [sp, #232] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #208] +; NONEON-NOSVE-NEXT: str x9, [sp, #224] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #192] +; NONEON-NOSVE-NEXT: strb w9, [sp, #236] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #224] +; NONEON-NOSVE-NEXT: tbz w8, #13, .LBB2_16 +; NONEON-NOSVE-NEXT: .LBB2_31: // %cond.load37 +; NONEON-NOSVE-NEXT: ldrb w9, [x0, #13] +; NONEON-NOSVE-NEXT: str q0, [sp, #144] +; NONEON-NOSVE-NEXT: strb w9, [sp, #176] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #158] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #176] +; NONEON-NOSVE-NEXT: strh w9, [sp, #174] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #156] +; NONEON-NOSVE-NEXT: str q0, [sp, #128] +; NONEON-NOSVE-NEXT: strb w9, [sp, #172] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #152] +; NONEON-NOSVE-NEXT: str w9, [sp, #168] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #144] +; NONEON-NOSVE-NEXT: str x9, [sp, #160] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #128] +; NONEON-NOSVE-NEXT: strb w9, [sp, #173] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #160] +; NONEON-NOSVE-NEXT: tbz w8, #14, .LBB2_17 +; NONEON-NOSVE-NEXT: .LBB2_32: // %cond.load40 +; NONEON-NOSVE-NEXT: ldrb w9, [x0, #14] +; NONEON-NOSVE-NEXT: str q0, [sp, #80] +; NONEON-NOSVE-NEXT: strb w9, [sp, #112] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #95] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #112] +; NONEON-NOSVE-NEXT: strb w9, [sp, #111] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #92] +; NONEON-NOSVE-NEXT: str q0, [sp, #64] +; NONEON-NOSVE-NEXT: strh w9, [sp, #108] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #88] +; NONEON-NOSVE-NEXT: str w9, [sp, #104] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #80] +; NONEON-NOSVE-NEXT: str x9, [sp, #96] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #64] +; NONEON-NOSVE-NEXT: strb w9, [sp, #110] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #96] +; NONEON-NOSVE-NEXT: tbnz w8, #15, .LBB2_18 +; NONEON-NOSVE-NEXT: b .LBB2_19 %load = call <16 x i8> @llvm.masked.load.v16i8(ptr %src, i32 8, <16 x i1> %mask, <16 x i8> zeroinitializer) ret <16 x i8> %load } @@ -130,6 +802,818 @@ define <32 x i8> @masked_load_v32i8(ptr %src, <32 x i1> %mask) { ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: sub sp, sp, #2064 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 2080 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #2216] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #2152] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #2272] +; NONEON-NOSVE-NEXT: ldr w11, [sp, #2176] +; NONEON-NOSVE-NEXT: ldr w12, [sp, #2160] +; NONEON-NOSVE-NEXT: and w8, w8, #0x1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #2024] +; NONEON-NOSVE-NEXT: and w8, w9, #0x1 +; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #2264] +; NONEON-NOSVE-NEXT: strb w8, [sp, #2016] +; NONEON-NOSVE-NEXT: sbfx w11, w11, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w9, #0x80 +; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #2256] +; NONEON-NOSVE-NEXT: strb w8, [sp, #2031] +; NONEON-NOSVE-NEXT: and w8, w9, #0x40 +; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #2248] +; NONEON-NOSVE-NEXT: strb w8, [sp, #2030] +; NONEON-NOSVE-NEXT: and w8, w9, #0x20 +; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #2240] +; NONEON-NOSVE-NEXT: strb w8, [sp, #2029] +; NONEON-NOSVE-NEXT: and w8, w9, #0x10 +; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #2232] +; NONEON-NOSVE-NEXT: strb w8, [sp, #2028] +; NONEON-NOSVE-NEXT: and w8, w9, #0x8 +; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #2224] +; NONEON-NOSVE-NEXT: strb w8, [sp, #2027] +; NONEON-NOSVE-NEXT: and w8, w9, #0x4 +; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #2208] +; NONEON-NOSVE-NEXT: strb w8, [sp, #2026] +; NONEON-NOSVE-NEXT: and w8, w9, #0x2 +; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #2200] +; NONEON-NOSVE-NEXT: strb w8, [sp, #2025] +; NONEON-NOSVE-NEXT: and w8, w9, #0x80 +; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #2192] +; NONEON-NOSVE-NEXT: strb w8, [sp, #2023] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #2184] +; NONEON-NOSVE-NEXT: and w9, w9, #0x40 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: strb w9, [sp, #2022] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #2168] +; NONEON-NOSVE-NEXT: and w10, w10, #0x20 +; NONEON-NOSVE-NEXT: and w8, w8, #0x10 +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: strb w10, [sp, #2021] +; NONEON-NOSVE-NEXT: strb w8, [sp, #2020] +; NONEON-NOSVE-NEXT: and w8, w11, #0x8 +; NONEON-NOSVE-NEXT: sbfx w10, w12, #0, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #2019] +; NONEON-NOSVE-NEXT: and w8, w9, #0x4 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #2088] +; NONEON-NOSVE-NEXT: strb w8, [sp, #2018] +; NONEON-NOSVE-NEXT: and w8, w10, #0x2 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #2136] +; NONEON-NOSVE-NEXT: strb w8, [sp, #2017] +; NONEON-NOSVE-NEXT: and w8, w9, #0x1 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #2144] +; NONEON-NOSVE-NEXT: strb w8, [sp, #2008] +; NONEON-NOSVE-NEXT: and w8, w1, #0x1 +; NONEON-NOSVE-NEXT: ldr w11, [sp, #2104] +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #2000] +; NONEON-NOSVE-NEXT: ldr w12, [sp, #2080] +; NONEON-NOSVE-NEXT: sbfx w11, w11, #0, #1 +; NONEON-NOSVE-NEXT: ldr q0, [sp, #2016] +; NONEON-NOSVE-NEXT: and w8, w9, #0x80 +; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #2128] +; NONEON-NOSVE-NEXT: strb w8, [sp, #2015] +; NONEON-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; NONEON-NOSVE-NEXT: and w8, w9, #0x40 +; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #2120] +; NONEON-NOSVE-NEXT: strb w8, [sp, #2014] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #2112] +; NONEON-NOSVE-NEXT: and w9, w9, #0x20 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: strb w9, [sp, #2013] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #2096] +; NONEON-NOSVE-NEXT: and w10, w10, #0x10 +; NONEON-NOSVE-NEXT: zip1 v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: and w8, w8, #0x8 +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: strb w10, [sp, #2012] +; NONEON-NOSVE-NEXT: strb w8, [sp, #2011] +; NONEON-NOSVE-NEXT: and w8, w11, #0x4 +; NONEON-NOSVE-NEXT: sbfx w10, w12, #0, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #2010] +; NONEON-NOSVE-NEXT: and w8, w9, #0x2 +; NONEON-NOSVE-NEXT: sbfx w9, w7, #0, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #2009] +; NONEON-NOSVE-NEXT: and w8, w10, #0x80 +; NONEON-NOSVE-NEXT: sbfx w10, w6, #0, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #2007] +; NONEON-NOSVE-NEXT: and w8, w9, #0x40 +; NONEON-NOSVE-NEXT: sbfx w9, w5, #0, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #2006] +; NONEON-NOSVE-NEXT: and w8, w10, #0x20 +; NONEON-NOSVE-NEXT: sbfx w10, w4, #0, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #2005] +; NONEON-NOSVE-NEXT: and w8, w9, #0x10 +; NONEON-NOSVE-NEXT: sbfx w9, w3, #0, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #2004] +; NONEON-NOSVE-NEXT: and w8, w10, #0x8 +; NONEON-NOSVE-NEXT: sbfx w10, w2, #0, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #2003] +; NONEON-NOSVE-NEXT: and w8, w9, #0x4 +; NONEON-NOSVE-NEXT: strb w8, [sp, #2002] +; NONEON-NOSVE-NEXT: and w8, w10, #0x2 +; NONEON-NOSVE-NEXT: strb w8, [sp, #2001] +; NONEON-NOSVE-NEXT: str q0, [sp, #2048] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #2000] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2050] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2048] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #2052] +; NONEON-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #2054] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #2056] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #2058] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: add w9, w10, w11 +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #2060] +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: add w9, w12, w13 +; NONEON-NOSVE-NEXT: zip1 v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: add w9, w9, w10 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: str q0, [sp, #2032] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #2034] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #2032] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #2036] +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #2038] +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #2040] +; NONEON-NOSVE-NEXT: ldrh w16, [sp, #2042] +; NONEON-NOSVE-NEXT: add w10, w12, w11 +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #2044] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #2062] +; NONEON-NOSVE-NEXT: add w13, w13, w14 +; NONEON-NOSVE-NEXT: add w14, w15, w16 +; NONEON-NOSVE-NEXT: add w10, w10, w13 +; NONEON-NOSVE-NEXT: add w11, w14, w11 +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #2046] +; NONEON-NOSVE-NEXT: add w9, w10, w11 +; NONEON-NOSVE-NEXT: add w10, w8, w12 +; NONEON-NOSVE-NEXT: add w8, w9, w13 +; NONEON-NOSVE-NEXT: adrp x9, .LCPI3_0 +; NONEON-NOSVE-NEXT: bfi w8, w10, #16, #16 +; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI3_0] +; NONEON-NOSVE-NEXT: add x9, sp, #1744 +; NONEON-NOSVE-NEXT: tbz w8, #0, .LBB3_2 +; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load +; NONEON-NOSVE-NEXT: ldrb w10, [x0] +; NONEON-NOSVE-NEXT: strb wzr, [sp, #1999] +; NONEON-NOSVE-NEXT: sturh wzr, [x9, #253] +; NONEON-NOSVE-NEXT: stur wzr, [x9, #249] +; NONEON-NOSVE-NEXT: stur xzr, [x9, #241] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1984] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1984] +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB3_3 +; NONEON-NOSVE-NEXT: b .LBB3_4 +; NONEON-NOSVE-NEXT: .LBB3_2: +; NONEON-NOSVE-NEXT: mov v0.16b, v1.16b +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB3_4 +; NONEON-NOSVE-NEXT: .LBB3_3: // %cond.load1 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #1] +; NONEON-NOSVE-NEXT: str q0, [sp, #1936] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1968] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #1950] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1968] +; NONEON-NOSVE-NEXT: strh w10, [sp, #1966] +; NONEON-NOSVE-NEXT: str q0, [sp, #1920] +; NONEON-NOSVE-NEXT: ldur w10, [x9, #202] +; NONEON-NOSVE-NEXT: ldur x11, [x9, #194] +; NONEON-NOSVE-NEXT: stur w10, [x9, #218] +; NONEON-NOSVE-NEXT: stur x11, [x9, #210] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1936] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1952] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1920] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1953] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1952] +; NONEON-NOSVE-NEXT: .LBB3_4: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB3_36 +; NONEON-NOSVE-NEXT: // %bb.5: // %else5 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB3_37 +; NONEON-NOSVE-NEXT: .LBB3_6: // %else8 +; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB3_38 +; NONEON-NOSVE-NEXT: .LBB3_7: // %else11 +; NONEON-NOSVE-NEXT: add x9, sp, #1488 +; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB3_39 +; NONEON-NOSVE-NEXT: .LBB3_8: // %else14 +; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB3_40 +; NONEON-NOSVE-NEXT: .LBB3_9: // %else17 +; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB3_41 +; NONEON-NOSVE-NEXT: .LBB3_10: // %else20 +; NONEON-NOSVE-NEXT: tbnz w8, #8, .LBB3_42 +; NONEON-NOSVE-NEXT: .LBB3_11: // %else23 +; NONEON-NOSVE-NEXT: add x9, sp, #1232 +; NONEON-NOSVE-NEXT: tbnz w8, #9, .LBB3_43 +; NONEON-NOSVE-NEXT: .LBB3_12: // %else26 +; NONEON-NOSVE-NEXT: tbnz w8, #10, .LBB3_44 +; NONEON-NOSVE-NEXT: .LBB3_13: // %else29 +; NONEON-NOSVE-NEXT: tbnz w8, #11, .LBB3_45 +; NONEON-NOSVE-NEXT: .LBB3_14: // %else32 +; NONEON-NOSVE-NEXT: tbnz w8, #12, .LBB3_46 +; NONEON-NOSVE-NEXT: .LBB3_15: // %else35 +; NONEON-NOSVE-NEXT: tbnz w8, #13, .LBB3_47 +; NONEON-NOSVE-NEXT: .LBB3_16: // %else38 +; NONEON-NOSVE-NEXT: tbnz w8, #14, .LBB3_48 +; NONEON-NOSVE-NEXT: .LBB3_17: // %else41 +; NONEON-NOSVE-NEXT: tbnz w8, #15, .LBB3_49 +; NONEON-NOSVE-NEXT: .LBB3_18: // %else44 +; NONEON-NOSVE-NEXT: tbnz w8, #16, .LBB3_50 +; NONEON-NOSVE-NEXT: .LBB3_19: // %else47 +; NONEON-NOSVE-NEXT: add x9, sp, #720 +; NONEON-NOSVE-NEXT: tbnz w8, #17, .LBB3_51 +; NONEON-NOSVE-NEXT: .LBB3_20: // %else50 +; NONEON-NOSVE-NEXT: tbnz w8, #18, .LBB3_52 +; NONEON-NOSVE-NEXT: .LBB3_21: // %else53 +; NONEON-NOSVE-NEXT: tbnz w8, #19, .LBB3_53 +; NONEON-NOSVE-NEXT: .LBB3_22: // %else56 +; NONEON-NOSVE-NEXT: tbnz w8, #20, .LBB3_54 +; NONEON-NOSVE-NEXT: .LBB3_23: // %else59 +; NONEON-NOSVE-NEXT: add x9, sp, #464 +; NONEON-NOSVE-NEXT: tbnz w8, #21, .LBB3_55 +; NONEON-NOSVE-NEXT: .LBB3_24: // %else62 +; NONEON-NOSVE-NEXT: tbnz w8, #22, .LBB3_56 +; NONEON-NOSVE-NEXT: .LBB3_25: // %else65 +; NONEON-NOSVE-NEXT: tbnz w8, #23, .LBB3_57 +; NONEON-NOSVE-NEXT: .LBB3_26: // %else68 +; NONEON-NOSVE-NEXT: tbnz w8, #24, .LBB3_58 +; NONEON-NOSVE-NEXT: .LBB3_27: // %else71 +; NONEON-NOSVE-NEXT: add x9, sp, #208 +; NONEON-NOSVE-NEXT: tbnz w8, #25, .LBB3_59 +; NONEON-NOSVE-NEXT: .LBB3_28: // %else74 +; NONEON-NOSVE-NEXT: tbnz w8, #26, .LBB3_60 +; NONEON-NOSVE-NEXT: .LBB3_29: // %else77 +; NONEON-NOSVE-NEXT: tbnz w8, #27, .LBB3_61 +; NONEON-NOSVE-NEXT: .LBB3_30: // %else80 +; NONEON-NOSVE-NEXT: tbnz w8, #28, .LBB3_62 +; NONEON-NOSVE-NEXT: .LBB3_31: // %else83 +; NONEON-NOSVE-NEXT: tbnz w8, #29, .LBB3_63 +; NONEON-NOSVE-NEXT: .LBB3_32: // %else86 +; NONEON-NOSVE-NEXT: tbnz w8, #30, .LBB3_64 +; NONEON-NOSVE-NEXT: .LBB3_33: // %else89 +; NONEON-NOSVE-NEXT: tbz w8, #31, .LBB3_35 +; NONEON-NOSVE-NEXT: .LBB3_34: // %cond.load91 +; NONEON-NOSVE-NEXT: ldrb w8, [x0, #31] +; NONEON-NOSVE-NEXT: str q1, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #48] +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: str q1, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: str w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: str x8, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #32] +; NONEON-NOSVE-NEXT: .LBB3_35: // %else92 +; NONEON-NOSVE-NEXT: add sp, sp, #2064 +; NONEON-NOSVE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB3_36: // %cond.load4 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #2] +; NONEON-NOSVE-NEXT: str q0, [sp, #1872] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1904] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1887] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1904] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1903] +; NONEON-NOSVE-NEXT: str q0, [sp, #1856] +; NONEON-NOSVE-NEXT: ldur w10, [x9, #139] +; NONEON-NOSVE-NEXT: ldur x11, [x9, #131] +; NONEON-NOSVE-NEXT: stur w10, [x9, #155] +; NONEON-NOSVE-NEXT: stur x11, [x9, #147] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #1872] +; NONEON-NOSVE-NEXT: strh w10, [sp, #1888] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1856] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1890] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1888] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB3_6 +; NONEON-NOSVE-NEXT: .LBB3_37: // %cond.load7 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #3] +; NONEON-NOSVE-NEXT: str q0, [sp, #1808] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1840] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #1820] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1840] +; NONEON-NOSVE-NEXT: str w10, [sp, #1836] +; NONEON-NOSVE-NEXT: str q0, [sp, #1792] +; NONEON-NOSVE-NEXT: ldur x10, [x9, #68] +; NONEON-NOSVE-NEXT: stur x10, [x9, #84] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1810] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1826] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #1808] +; NONEON-NOSVE-NEXT: strh w10, [sp, #1824] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1792] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1827] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1824] +; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB3_7 +; NONEON-NOSVE-NEXT: .LBB3_38: // %cond.load10 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #4] +; NONEON-NOSVE-NEXT: str q0, [sp, #1744] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1776] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1759] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1776] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1775] +; NONEON-NOSVE-NEXT: str q0, [sp, #1728] +; NONEON-NOSVE-NEXT: ldurh w10, [x9, #13] +; NONEON-NOSVE-NEXT: ldur x11, [x9, #5] +; NONEON-NOSVE-NEXT: sturh w10, [x9, #29] +; NONEON-NOSVE-NEXT: stur x11, [x9, #21] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #1744] +; NONEON-NOSVE-NEXT: str w9, [sp, #1760] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1728] +; NONEON-NOSVE-NEXT: strb w9, [sp, #1764] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1760] +; NONEON-NOSVE-NEXT: add x9, sp, #1488 +; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB3_8 +; NONEON-NOSVE-NEXT: .LBB3_39: // %cond.load13 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #5] +; NONEON-NOSVE-NEXT: str q0, [sp, #1680] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1712] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #1694] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1712] +; NONEON-NOSVE-NEXT: strh w10, [sp, #1710] +; NONEON-NOSVE-NEXT: str q0, [sp, #1664] +; NONEON-NOSVE-NEXT: ldur x10, [x9, #198] +; NONEON-NOSVE-NEXT: stur x10, [x9, #214] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1684] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1700] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #1680] +; NONEON-NOSVE-NEXT: str w10, [sp, #1696] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1664] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1701] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1696] +; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB3_9 +; NONEON-NOSVE-NEXT: .LBB3_40: // %cond.load16 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #6] +; NONEON-NOSVE-NEXT: str q0, [sp, #1616] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1648] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1631] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1648] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1647] +; NONEON-NOSVE-NEXT: str q0, [sp, #1600] +; NONEON-NOSVE-NEXT: ldur x10, [x9, #135] +; NONEON-NOSVE-NEXT: stur x10, [x9, #151] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #1620] +; NONEON-NOSVE-NEXT: strh w10, [sp, #1636] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #1616] +; NONEON-NOSVE-NEXT: str w10, [sp, #1632] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1600] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1638] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1632] +; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB3_10 +; NONEON-NOSVE-NEXT: .LBB3_41: // %cond.load19 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #7] +; NONEON-NOSVE-NEXT: str q0, [sp, #1552] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1584] +; NONEON-NOSVE-NEXT: ldr x10, [sp, #1560] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1584] +; NONEON-NOSVE-NEXT: str x10, [sp, #1576] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1558] +; NONEON-NOSVE-NEXT: str q0, [sp, #1536] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1574] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #1556] +; NONEON-NOSVE-NEXT: strh w10, [sp, #1572] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #1552] +; NONEON-NOSVE-NEXT: str w10, [sp, #1568] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1536] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1575] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1568] +; NONEON-NOSVE-NEXT: tbz w8, #8, .LBB3_11 +; NONEON-NOSVE-NEXT: .LBB3_42: // %cond.load22 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #8] +; NONEON-NOSVE-NEXT: str q0, [sp, #1488] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1520] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1503] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1520] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1519] +; NONEON-NOSVE-NEXT: str q0, [sp, #1472] +; NONEON-NOSVE-NEXT: ldurh w10, [x9, #13] +; NONEON-NOSVE-NEXT: ldur w11, [x9, #9] +; NONEON-NOSVE-NEXT: sturh w10, [x9, #29] +; NONEON-NOSVE-NEXT: stur w11, [x9, #25] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #1488] +; NONEON-NOSVE-NEXT: str x9, [sp, #1504] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1472] +; NONEON-NOSVE-NEXT: strb w9, [sp, #1512] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1504] +; NONEON-NOSVE-NEXT: add x9, sp, #1232 +; NONEON-NOSVE-NEXT: tbz w8, #9, .LBB3_12 +; NONEON-NOSVE-NEXT: .LBB3_43: // %cond.load25 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #9] +; NONEON-NOSVE-NEXT: str q0, [sp, #1424] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1456] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #1438] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1456] +; NONEON-NOSVE-NEXT: strh w10, [sp, #1454] +; NONEON-NOSVE-NEXT: str q0, [sp, #1408] +; NONEON-NOSVE-NEXT: ldur w10, [x9, #202] +; NONEON-NOSVE-NEXT: stur w10, [x9, #218] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1432] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1448] +; NONEON-NOSVE-NEXT: ldr x10, [sp, #1424] +; NONEON-NOSVE-NEXT: str x10, [sp, #1440] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1408] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1449] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1440] +; NONEON-NOSVE-NEXT: tbz w8, #10, .LBB3_13 +; NONEON-NOSVE-NEXT: .LBB3_44: // %cond.load28 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #10] +; NONEON-NOSVE-NEXT: str q0, [sp, #1360] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1392] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1375] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1392] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1391] +; NONEON-NOSVE-NEXT: str q0, [sp, #1344] +; NONEON-NOSVE-NEXT: ldur w10, [x9, #139] +; NONEON-NOSVE-NEXT: stur w10, [x9, #155] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #1368] +; NONEON-NOSVE-NEXT: strh w10, [sp, #1384] +; NONEON-NOSVE-NEXT: ldr x10, [sp, #1360] +; NONEON-NOSVE-NEXT: str x10, [sp, #1376] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1344] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1386] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1376] +; NONEON-NOSVE-NEXT: tbz w8, #11, .LBB3_14 +; NONEON-NOSVE-NEXT: .LBB3_45: // %cond.load31 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #11] +; NONEON-NOSVE-NEXT: str q0, [sp, #1296] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1328] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #1308] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1328] +; NONEON-NOSVE-NEXT: str w10, [sp, #1324] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1306] +; NONEON-NOSVE-NEXT: str q0, [sp, #1280] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1322] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #1304] +; NONEON-NOSVE-NEXT: strh w10, [sp, #1320] +; NONEON-NOSVE-NEXT: ldr x10, [sp, #1296] +; NONEON-NOSVE-NEXT: str x10, [sp, #1312] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1280] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1323] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1312] +; NONEON-NOSVE-NEXT: tbz w8, #12, .LBB3_15 +; NONEON-NOSVE-NEXT: .LBB3_46: // %cond.load34 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #12] +; NONEON-NOSVE-NEXT: str q0, [sp, #1232] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1264] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1247] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1264] +; NONEON-NOSVE-NEXT: strb w10, [sp, #1263] +; NONEON-NOSVE-NEXT: str q0, [sp, #1216] +; NONEON-NOSVE-NEXT: ldurh w10, [x9, #13] +; NONEON-NOSVE-NEXT: sturh w10, [x9, #29] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #1240] +; NONEON-NOSVE-NEXT: str w9, [sp, #1256] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #1232] +; NONEON-NOSVE-NEXT: str x9, [sp, #1248] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1216] +; NONEON-NOSVE-NEXT: strb w9, [sp, #1260] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1248] +; NONEON-NOSVE-NEXT: tbz w8, #13, .LBB3_16 +; NONEON-NOSVE-NEXT: .LBB3_47: // %cond.load37 +; NONEON-NOSVE-NEXT: ldrb w9, [x0, #13] +; NONEON-NOSVE-NEXT: str q0, [sp, #1168] +; NONEON-NOSVE-NEXT: strb w9, [sp, #1200] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #1182] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1200] +; NONEON-NOSVE-NEXT: strh w9, [sp, #1198] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1180] +; NONEON-NOSVE-NEXT: str q0, [sp, #1152] +; NONEON-NOSVE-NEXT: strb w9, [sp, #1196] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #1176] +; NONEON-NOSVE-NEXT: str w9, [sp, #1192] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #1168] +; NONEON-NOSVE-NEXT: str x9, [sp, #1184] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1152] +; NONEON-NOSVE-NEXT: strb w9, [sp, #1197] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1184] +; NONEON-NOSVE-NEXT: tbz w8, #14, .LBB3_17 +; NONEON-NOSVE-NEXT: .LBB3_48: // %cond.load40 +; NONEON-NOSVE-NEXT: ldrb w9, [x0, #14] +; NONEON-NOSVE-NEXT: str q0, [sp, #1104] +; NONEON-NOSVE-NEXT: strb w9, [sp, #1136] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1119] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1136] +; NONEON-NOSVE-NEXT: strb w9, [sp, #1135] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #1116] +; NONEON-NOSVE-NEXT: str q0, [sp, #1088] +; NONEON-NOSVE-NEXT: strh w9, [sp, #1132] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #1112] +; NONEON-NOSVE-NEXT: str w9, [sp, #1128] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #1104] +; NONEON-NOSVE-NEXT: str x9, [sp, #1120] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1088] +; NONEON-NOSVE-NEXT: strb w9, [sp, #1134] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1120] +; NONEON-NOSVE-NEXT: tbz w8, #15, .LBB3_18 +; NONEON-NOSVE-NEXT: .LBB3_49: // %cond.load43 +; NONEON-NOSVE-NEXT: ldrb w9, [x0, #15] +; NONEON-NOSVE-NEXT: str q0, [sp, #1024] +; NONEON-NOSVE-NEXT: strb w9, [sp, #1072] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1038] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1072] +; NONEON-NOSVE-NEXT: strb w9, [sp, #1070] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #1036] +; NONEON-NOSVE-NEXT: str q0, [sp, #1040] +; NONEON-NOSVE-NEXT: strh w9, [sp, #1068] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #1032] +; NONEON-NOSVE-NEXT: str w9, [sp, #1064] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #1024] +; NONEON-NOSVE-NEXT: str x9, [sp, #1056] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1040] +; NONEON-NOSVE-NEXT: strb w9, [sp, #1071] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #1056] +; NONEON-NOSVE-NEXT: tbz w8, #16, .LBB3_19 +; NONEON-NOSVE-NEXT: .LBB3_50: // %cond.load46 +; NONEON-NOSVE-NEXT: ldrb w9, [x0, #16] +; NONEON-NOSVE-NEXT: str q1, [sp, #976] +; NONEON-NOSVE-NEXT: add x10, sp, #976 +; NONEON-NOSVE-NEXT: strb w9, [sp, #1008] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #991] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #1008] +; NONEON-NOSVE-NEXT: strb w9, [sp, #1007] +; NONEON-NOSVE-NEXT: str q1, [sp, #960] +; NONEON-NOSVE-NEXT: ldurh w9, [x10, #13] +; NONEON-NOSVE-NEXT: ldur w11, [x10, #9] +; NONEON-NOSVE-NEXT: sturh w9, [x10, #29] +; NONEON-NOSVE-NEXT: ldur x9, [x10, #1] +; NONEON-NOSVE-NEXT: stur w11, [x10, #25] +; NONEON-NOSVE-NEXT: stur x9, [x10, #17] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #960] +; NONEON-NOSVE-NEXT: strb w9, [sp, #992] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #992] +; NONEON-NOSVE-NEXT: add x9, sp, #720 +; NONEON-NOSVE-NEXT: tbz w8, #17, .LBB3_20 +; NONEON-NOSVE-NEXT: .LBB3_51: // %cond.load49 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #17] +; NONEON-NOSVE-NEXT: str q1, [sp, #912] +; NONEON-NOSVE-NEXT: strb w10, [sp, #944] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #926] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #944] +; NONEON-NOSVE-NEXT: strh w10, [sp, #942] +; NONEON-NOSVE-NEXT: str q1, [sp, #896] +; NONEON-NOSVE-NEXT: ldur w10, [x9, #202] +; NONEON-NOSVE-NEXT: ldur x11, [x9, #194] +; NONEON-NOSVE-NEXT: stur w10, [x9, #218] +; NONEON-NOSVE-NEXT: stur x11, [x9, #210] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #912] +; NONEON-NOSVE-NEXT: strb w10, [sp, #928] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #896] +; NONEON-NOSVE-NEXT: strb w10, [sp, #929] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #928] +; NONEON-NOSVE-NEXT: tbz w8, #18, .LBB3_21 +; NONEON-NOSVE-NEXT: .LBB3_52: // %cond.load52 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #18] +; NONEON-NOSVE-NEXT: str q1, [sp, #848] +; NONEON-NOSVE-NEXT: strb w10, [sp, #880] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #863] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #880] +; NONEON-NOSVE-NEXT: strb w10, [sp, #879] +; NONEON-NOSVE-NEXT: str q1, [sp, #832] +; NONEON-NOSVE-NEXT: ldur w10, [x9, #139] +; NONEON-NOSVE-NEXT: ldur x11, [x9, #131] +; NONEON-NOSVE-NEXT: stur w10, [x9, #155] +; NONEON-NOSVE-NEXT: stur x11, [x9, #147] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #848] +; NONEON-NOSVE-NEXT: strh w10, [sp, #864] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #832] +; NONEON-NOSVE-NEXT: strb w10, [sp, #866] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #864] +; NONEON-NOSVE-NEXT: tbz w8, #19, .LBB3_22 +; NONEON-NOSVE-NEXT: .LBB3_53: // %cond.load55 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #19] +; NONEON-NOSVE-NEXT: str q1, [sp, #784] +; NONEON-NOSVE-NEXT: strb w10, [sp, #816] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #796] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #816] +; NONEON-NOSVE-NEXT: str w10, [sp, #812] +; NONEON-NOSVE-NEXT: str q1, [sp, #768] +; NONEON-NOSVE-NEXT: ldur x10, [x9, #68] +; NONEON-NOSVE-NEXT: stur x10, [x9, #84] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #786] +; NONEON-NOSVE-NEXT: strb w10, [sp, #802] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #784] +; NONEON-NOSVE-NEXT: strh w10, [sp, #800] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #768] +; NONEON-NOSVE-NEXT: strb w10, [sp, #803] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #800] +; NONEON-NOSVE-NEXT: tbz w8, #20, .LBB3_23 +; NONEON-NOSVE-NEXT: .LBB3_54: // %cond.load58 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #20] +; NONEON-NOSVE-NEXT: str q1, [sp, #720] +; NONEON-NOSVE-NEXT: strb w10, [sp, #752] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #735] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #752] +; NONEON-NOSVE-NEXT: strb w10, [sp, #751] +; NONEON-NOSVE-NEXT: str q1, [sp, #704] +; NONEON-NOSVE-NEXT: ldurh w10, [x9, #13] +; NONEON-NOSVE-NEXT: ldur x11, [x9, #5] +; NONEON-NOSVE-NEXT: sturh w10, [x9, #29] +; NONEON-NOSVE-NEXT: stur x11, [x9, #21] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #720] +; NONEON-NOSVE-NEXT: str w9, [sp, #736] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #704] +; NONEON-NOSVE-NEXT: strb w9, [sp, #740] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #736] +; NONEON-NOSVE-NEXT: add x9, sp, #464 +; NONEON-NOSVE-NEXT: tbz w8, #21, .LBB3_24 +; NONEON-NOSVE-NEXT: .LBB3_55: // %cond.load61 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #21] +; NONEON-NOSVE-NEXT: str q1, [sp, #656] +; NONEON-NOSVE-NEXT: strb w10, [sp, #688] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #670] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #688] +; NONEON-NOSVE-NEXT: strh w10, [sp, #686] +; NONEON-NOSVE-NEXT: str q1, [sp, #640] +; NONEON-NOSVE-NEXT: ldur x10, [x9, #198] +; NONEON-NOSVE-NEXT: stur x10, [x9, #214] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #660] +; NONEON-NOSVE-NEXT: strb w10, [sp, #676] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #656] +; NONEON-NOSVE-NEXT: str w10, [sp, #672] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #640] +; NONEON-NOSVE-NEXT: strb w10, [sp, #677] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #672] +; NONEON-NOSVE-NEXT: tbz w8, #22, .LBB3_25 +; NONEON-NOSVE-NEXT: .LBB3_56: // %cond.load64 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #22] +; NONEON-NOSVE-NEXT: str q1, [sp, #592] +; NONEON-NOSVE-NEXT: strb w10, [sp, #624] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #607] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #624] +; NONEON-NOSVE-NEXT: strb w10, [sp, #623] +; NONEON-NOSVE-NEXT: str q1, [sp, #576] +; NONEON-NOSVE-NEXT: ldur x10, [x9, #135] +; NONEON-NOSVE-NEXT: stur x10, [x9, #151] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #596] +; NONEON-NOSVE-NEXT: strh w10, [sp, #612] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #592] +; NONEON-NOSVE-NEXT: str w10, [sp, #608] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #576] +; NONEON-NOSVE-NEXT: strb w10, [sp, #614] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #608] +; NONEON-NOSVE-NEXT: tbz w8, #23, .LBB3_26 +; NONEON-NOSVE-NEXT: .LBB3_57: // %cond.load67 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #23] +; NONEON-NOSVE-NEXT: str q1, [sp, #528] +; NONEON-NOSVE-NEXT: strb w10, [sp, #560] +; NONEON-NOSVE-NEXT: ldr x10, [sp, #536] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #560] +; NONEON-NOSVE-NEXT: str x10, [sp, #552] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #534] +; NONEON-NOSVE-NEXT: str q1, [sp, #512] +; NONEON-NOSVE-NEXT: strb w10, [sp, #550] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #532] +; NONEON-NOSVE-NEXT: strh w10, [sp, #548] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #528] +; NONEON-NOSVE-NEXT: str w10, [sp, #544] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #512] +; NONEON-NOSVE-NEXT: strb w10, [sp, #551] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #544] +; NONEON-NOSVE-NEXT: tbz w8, #24, .LBB3_27 +; NONEON-NOSVE-NEXT: .LBB3_58: // %cond.load70 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #24] +; NONEON-NOSVE-NEXT: str q1, [sp, #464] +; NONEON-NOSVE-NEXT: strb w10, [sp, #496] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #479] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #496] +; NONEON-NOSVE-NEXT: strb w10, [sp, #495] +; NONEON-NOSVE-NEXT: str q1, [sp, #448] +; NONEON-NOSVE-NEXT: ldurh w10, [x9, #13] +; NONEON-NOSVE-NEXT: ldur w11, [x9, #9] +; NONEON-NOSVE-NEXT: sturh w10, [x9, #29] +; NONEON-NOSVE-NEXT: stur w11, [x9, #25] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #464] +; NONEON-NOSVE-NEXT: str x9, [sp, #480] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #448] +; NONEON-NOSVE-NEXT: strb w9, [sp, #488] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #480] +; NONEON-NOSVE-NEXT: add x9, sp, #208 +; NONEON-NOSVE-NEXT: tbz w8, #25, .LBB3_28 +; NONEON-NOSVE-NEXT: .LBB3_59: // %cond.load73 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #25] +; NONEON-NOSVE-NEXT: str q1, [sp, #400] +; NONEON-NOSVE-NEXT: strb w10, [sp, #432] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #414] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #432] +; NONEON-NOSVE-NEXT: strh w10, [sp, #430] +; NONEON-NOSVE-NEXT: str q1, [sp, #384] +; NONEON-NOSVE-NEXT: ldur w10, [x9, #202] +; NONEON-NOSVE-NEXT: stur w10, [x9, #218] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #408] +; NONEON-NOSVE-NEXT: strb w10, [sp, #424] +; NONEON-NOSVE-NEXT: ldr x10, [sp, #400] +; NONEON-NOSVE-NEXT: str x10, [sp, #416] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #384] +; NONEON-NOSVE-NEXT: strb w10, [sp, #425] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #416] +; NONEON-NOSVE-NEXT: tbz w8, #26, .LBB3_29 +; NONEON-NOSVE-NEXT: .LBB3_60: // %cond.load76 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #26] +; NONEON-NOSVE-NEXT: str q1, [sp, #336] +; NONEON-NOSVE-NEXT: strb w10, [sp, #368] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #351] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #368] +; NONEON-NOSVE-NEXT: strb w10, [sp, #367] +; NONEON-NOSVE-NEXT: str q1, [sp, #320] +; NONEON-NOSVE-NEXT: ldur w10, [x9, #139] +; NONEON-NOSVE-NEXT: stur w10, [x9, #155] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #344] +; NONEON-NOSVE-NEXT: strh w10, [sp, #360] +; NONEON-NOSVE-NEXT: ldr x10, [sp, #336] +; NONEON-NOSVE-NEXT: str x10, [sp, #352] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #320] +; NONEON-NOSVE-NEXT: strb w10, [sp, #362] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #352] +; NONEON-NOSVE-NEXT: tbz w8, #27, .LBB3_30 +; NONEON-NOSVE-NEXT: .LBB3_61: // %cond.load79 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #27] +; NONEON-NOSVE-NEXT: str q1, [sp, #272] +; NONEON-NOSVE-NEXT: strb w10, [sp, #304] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #284] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #304] +; NONEON-NOSVE-NEXT: str w10, [sp, #300] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #282] +; NONEON-NOSVE-NEXT: str q1, [sp, #256] +; NONEON-NOSVE-NEXT: strb w10, [sp, #298] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #280] +; NONEON-NOSVE-NEXT: strh w10, [sp, #296] +; NONEON-NOSVE-NEXT: ldr x10, [sp, #272] +; NONEON-NOSVE-NEXT: str x10, [sp, #288] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #256] +; NONEON-NOSVE-NEXT: strb w10, [sp, #299] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #288] +; NONEON-NOSVE-NEXT: tbz w8, #28, .LBB3_31 +; NONEON-NOSVE-NEXT: .LBB3_62: // %cond.load82 +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #28] +; NONEON-NOSVE-NEXT: str q1, [sp, #208] +; NONEON-NOSVE-NEXT: strb w10, [sp, #240] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #223] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #240] +; NONEON-NOSVE-NEXT: strb w10, [sp, #239] +; NONEON-NOSVE-NEXT: str q1, [sp, #192] +; NONEON-NOSVE-NEXT: ldurh w10, [x9, #13] +; NONEON-NOSVE-NEXT: sturh w10, [x9, #29] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #216] +; NONEON-NOSVE-NEXT: str w9, [sp, #232] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #208] +; NONEON-NOSVE-NEXT: str x9, [sp, #224] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #192] +; NONEON-NOSVE-NEXT: strb w9, [sp, #236] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #224] +; NONEON-NOSVE-NEXT: tbz w8, #29, .LBB3_32 +; NONEON-NOSVE-NEXT: .LBB3_63: // %cond.load85 +; NONEON-NOSVE-NEXT: ldrb w9, [x0, #29] +; NONEON-NOSVE-NEXT: str q1, [sp, #144] +; NONEON-NOSVE-NEXT: strb w9, [sp, #176] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #158] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #176] +; NONEON-NOSVE-NEXT: strh w9, [sp, #174] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #156] +; NONEON-NOSVE-NEXT: str q1, [sp, #128] +; NONEON-NOSVE-NEXT: strb w9, [sp, #172] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #152] +; NONEON-NOSVE-NEXT: str w9, [sp, #168] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #144] +; NONEON-NOSVE-NEXT: str x9, [sp, #160] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #128] +; NONEON-NOSVE-NEXT: strb w9, [sp, #173] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #160] +; NONEON-NOSVE-NEXT: tbz w8, #30, .LBB3_33 +; NONEON-NOSVE-NEXT: .LBB3_64: // %cond.load88 +; NONEON-NOSVE-NEXT: ldrb w9, [x0, #30] +; NONEON-NOSVE-NEXT: str q1, [sp, #80] +; NONEON-NOSVE-NEXT: strb w9, [sp, #112] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #95] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #112] +; NONEON-NOSVE-NEXT: strb w9, [sp, #111] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #92] +; NONEON-NOSVE-NEXT: str q1, [sp, #64] +; NONEON-NOSVE-NEXT: strh w9, [sp, #108] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #88] +; NONEON-NOSVE-NEXT: str w9, [sp, #104] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #80] +; NONEON-NOSVE-NEXT: str x9, [sp, #96] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #64] +; NONEON-NOSVE-NEXT: strb w9, [sp, #110] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #96] +; NONEON-NOSVE-NEXT: tbnz w8, #31, .LBB3_34 +; NONEON-NOSVE-NEXT: b .LBB3_35 %load = call <32 x i8> @llvm.masked.load.v32i8(ptr %src, i32 8, <32 x i1> %mask, <32 x i8> zeroinitializer) ret <32 x i8> %load } @@ -155,6 +1639,40 @@ define <2 x half> @masked_load_v2f16(ptr %src, <2 x i1> %mask) { ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #48 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: str d0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #32] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x2 +; NONEON-NOSVE-NEXT: bfxil w8, w9, #0, #1 +; NONEON-NOSVE-NEXT: tbz w8, #0, .LBB4_2 +; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: ldr h1, [x0] +; NONEON-NOSVE-NEXT: str h1, [sp, #24] +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB4_3 +; NONEON-NOSVE-NEXT: b .LBB4_4 +; NONEON-NOSVE-NEXT: .LBB4_2: +; NONEON-NOSVE-NEXT: adrp x9, .LCPI4_0 +; NONEON-NOSVE-NEXT: ldr d0, [x9, :lo12:.LCPI4_0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB4_4 +; NONEON-NOSVE-NEXT: .LBB4_3: // %cond.load1 +; NONEON-NOSVE-NEXT: ldr h1, [x0, #2] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] +; NONEON-NOSVE-NEXT: str h1, [sp, #18] +; NONEON-NOSVE-NEXT: str h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: .LBB4_4: // %else2 +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %load = call <2 x half> @llvm.masked.load.v2f16(ptr %src, i32 8, <2 x i1> %mask, <2 x half> zeroinitializer) ret <2 x half> %load } @@ -170,6 +1688,88 @@ define <4 x half> @masked_load_v4f16(ptr %src, <4 x i1> %mask) { ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #128 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 +; NONEON-NOSVE-NEXT: str d0, [sp, #112] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #114] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #116] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #118] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #112] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x2 +; NONEON-NOSVE-NEXT: and w9, w9, #0x4 +; NONEON-NOSVE-NEXT: and w10, w10, #0x8 +; NONEON-NOSVE-NEXT: bfxil w8, w11, #0, #1 +; NONEON-NOSVE-NEXT: orr w9, w9, w10 +; NONEON-NOSVE-NEXT: orr w8, w8, w9 +; NONEON-NOSVE-NEXT: tbz w8, #0, .LBB5_2 +; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: ldr h1, [x0] +; NONEON-NOSVE-NEXT: stur wzr, [sp, #106] +; NONEON-NOSVE-NEXT: str h1, [sp, #104] +; NONEON-NOSVE-NEXT: str h0, [sp, #110] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #104] +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB5_3 +; NONEON-NOSVE-NEXT: b .LBB5_4 +; NONEON-NOSVE-NEXT: .LBB5_2: +; NONEON-NOSVE-NEXT: adrp x9, .LCPI5_0 +; NONEON-NOSVE-NEXT: ldr d0, [x9, :lo12:.LCPI5_0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB5_4 +; NONEON-NOSVE-NEXT: .LBB5_3: // %cond.load1 +; NONEON-NOSVE-NEXT: ldr h1, [x0, #2] +; NONEON-NOSVE-NEXT: str d0, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #84] +; NONEON-NOSVE-NEXT: str h1, [sp, #96] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #96] +; NONEON-NOSVE-NEXT: str w9, [sp, #92] +; NONEON-NOSVE-NEXT: str d0, [sp, #72] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #80] +; NONEON-NOSVE-NEXT: str h0, [sp, #88] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #72] +; NONEON-NOSVE-NEXT: str h0, [sp, #90] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #88] +; NONEON-NOSVE-NEXT: .LBB5_4: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB5_7 +; NONEON-NOSVE-NEXT: // %bb.5: // %else5 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB5_8 +; NONEON-NOSVE-NEXT: .LBB5_6: // %else8 +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB5_7: // %cond.load4 +; NONEON-NOSVE-NEXT: ldr h1, [x0, #4] +; NONEON-NOSVE-NEXT: str d0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #48] +; NONEON-NOSVE-NEXT: str h1, [sp, #64] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #64] +; NONEON-NOSVE-NEXT: str w9, [sp, #56] +; NONEON-NOSVE-NEXT: str d0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: str h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #40] +; NONEON-NOSVE-NEXT: str h0, [sp, #60] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #56] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB5_6 +; NONEON-NOSVE-NEXT: .LBB5_8: // %cond.load7 +; NONEON-NOSVE-NEXT: ldr h1, [x0, #6] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: str h1, [sp, #32] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #32] +; NONEON-NOSVE-NEXT: str w8, [sp, #24] +; NONEON-NOSVE-NEXT: str d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret %load = call <4 x half> @llvm.masked.load.v4f16(ptr %src, i32 8, <4 x i1> %mask, <4 x half> zeroinitializer) ret <4 x half> %load } @@ -186,6 +1786,187 @@ define <8 x half> @masked_load_v8f16(ptr %src, <8 x i1> %mask) { ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #496 +; NONEON-NOSVE-NEXT: str x29, [sp, #480] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 496 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: str d0, [sp, #464] +; NONEON-NOSVE-NEXT: add x9, sp, #208 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #466] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #467] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #465] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #468] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #469] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #470] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w11, w11, #0, #1 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #464] +; NONEON-NOSVE-NEXT: sbfx w13, w13, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w14, w14, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x4 +; NONEON-NOSVE-NEXT: and w10, w10, #0x8 +; NONEON-NOSVE-NEXT: sbfx w15, w15, #0, #1 +; NONEON-NOSVE-NEXT: orr w8, w8, w10 +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #471] +; NONEON-NOSVE-NEXT: and w11, w11, #0x2 +; NONEON-NOSVE-NEXT: and w13, w13, #0x10 +; NONEON-NOSVE-NEXT: bfxil w11, w12, #0, #1 +; NONEON-NOSVE-NEXT: and w12, w14, #0x20 +; NONEON-NOSVE-NEXT: orr w8, w8, w13 +; NONEON-NOSVE-NEXT: and w13, w15, #0x40 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: orr w8, w11, w8 +; NONEON-NOSVE-NEXT: orr w11, w12, w13 +; NONEON-NOSVE-NEXT: orr w8, w8, w11 +; NONEON-NOSVE-NEXT: and w10, w10, #0x80 +; NONEON-NOSVE-NEXT: add w10, w8, w10 +; NONEON-NOSVE-NEXT: and w8, w10, #0xff +; NONEON-NOSVE-NEXT: tbz w10, #0, .LBB6_2 +; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: ldr h1, [x0] +; NONEON-NOSVE-NEXT: stur wzr, [x9, #250] +; NONEON-NOSVE-NEXT: stur xzr, [x9, #242] +; NONEON-NOSVE-NEXT: str h1, [sp, #448] +; NONEON-NOSVE-NEXT: str h0, [sp, #462] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #448] +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB6_3 +; NONEON-NOSVE-NEXT: b .LBB6_4 +; NONEON-NOSVE-NEXT: .LBB6_2: +; NONEON-NOSVE-NEXT: adrp x10, .LCPI6_0 +; NONEON-NOSVE-NEXT: ldr q0, [x10, :lo12:.LCPI6_0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB6_4 +; NONEON-NOSVE-NEXT: .LBB6_3: // %cond.load1 +; NONEON-NOSVE-NEXT: ldr h1, [x0, #2] +; NONEON-NOSVE-NEXT: str q0, [sp, #400] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #412] +; NONEON-NOSVE-NEXT: str h1, [sp, #432] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #432] +; NONEON-NOSVE-NEXT: str w10, [sp, #428] +; NONEON-NOSVE-NEXT: str q0, [sp, #384] +; NONEON-NOSVE-NEXT: ldur x10, [x9, #196] +; NONEON-NOSVE-NEXT: stur x10, [x9, #212] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #400] +; NONEON-NOSVE-NEXT: str h0, [sp, #416] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #384] +; NONEON-NOSVE-NEXT: str h0, [sp, #418] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #416] +; NONEON-NOSVE-NEXT: .LBB6_4: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB6_12 +; NONEON-NOSVE-NEXT: // %bb.5: // %else5 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB6_13 +; NONEON-NOSVE-NEXT: .LBB6_6: // %else8 +; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB6_14 +; NONEON-NOSVE-NEXT: .LBB6_7: // %else11 +; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB6_15 +; NONEON-NOSVE-NEXT: .LBB6_8: // %else14 +; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB6_16 +; NONEON-NOSVE-NEXT: .LBB6_9: // %else17 +; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB6_11 +; NONEON-NOSVE-NEXT: .LBB6_10: // %cond.load19 +; NONEON-NOSVE-NEXT: ldr h1, [x0, #14] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: str h1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: str w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: str x8, [sp, #32] +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: .LBB6_11: // %else20 +; NONEON-NOSVE-NEXT: ldr x29, [sp, #480] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: add sp, sp, #496 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB6_12: // %cond.load4 +; NONEON-NOSVE-NEXT: ldr h1, [x0, #4] +; NONEON-NOSVE-NEXT: str q0, [sp, #336] +; NONEON-NOSVE-NEXT: str h1, [sp, #368] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #368] +; NONEON-NOSVE-NEXT: str q0, [sp, #320] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #350] +; NONEON-NOSVE-NEXT: str h0, [sp, #366] +; NONEON-NOSVE-NEXT: ldur x10, [x9, #134] +; NONEON-NOSVE-NEXT: stur x10, [x9, #150] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #336] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #320] +; NONEON-NOSVE-NEXT: str w10, [sp, #352] +; NONEON-NOSVE-NEXT: str h0, [sp, #356] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #352] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB6_6 +; NONEON-NOSVE-NEXT: .LBB6_13: // %cond.load7 +; NONEON-NOSVE-NEXT: ldr h1, [x0, #6] +; NONEON-NOSVE-NEXT: str q0, [sp, #272] +; NONEON-NOSVE-NEXT: ldr x10, [sp, #280] +; NONEON-NOSVE-NEXT: str h1, [sp, #304] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #304] +; NONEON-NOSVE-NEXT: str x10, [sp, #296] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #272] +; NONEON-NOSVE-NEXT: str q0, [sp, #256] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #276] +; NONEON-NOSVE-NEXT: str w10, [sp, #288] +; NONEON-NOSVE-NEXT: str h0, [sp, #292] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #256] +; NONEON-NOSVE-NEXT: str h0, [sp, #294] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #288] +; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB6_7 +; NONEON-NOSVE-NEXT: .LBB6_14: // %cond.load10 +; NONEON-NOSVE-NEXT: ldr h1, [x0, #8] +; NONEON-NOSVE-NEXT: str q0, [sp, #208] +; NONEON-NOSVE-NEXT: str h1, [sp, #240] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #240] +; NONEON-NOSVE-NEXT: str q0, [sp, #192] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #222] +; NONEON-NOSVE-NEXT: str h0, [sp, #238] +; NONEON-NOSVE-NEXT: ldur w10, [x9, #10] +; NONEON-NOSVE-NEXT: stur w10, [x9, #26] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #208] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #192] +; NONEON-NOSVE-NEXT: str x9, [sp, #224] +; NONEON-NOSVE-NEXT: str h0, [sp, #232] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #224] +; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB6_8 +; NONEON-NOSVE-NEXT: .LBB6_15: // %cond.load13 +; NONEON-NOSVE-NEXT: ldr h1, [x0, #10] +; NONEON-NOSVE-NEXT: str q0, [sp, #144] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #156] +; NONEON-NOSVE-NEXT: str h1, [sp, #176] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #176] +; NONEON-NOSVE-NEXT: str w9, [sp, #172] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #144] +; NONEON-NOSVE-NEXT: str q0, [sp, #128] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #152] +; NONEON-NOSVE-NEXT: str x9, [sp, #160] +; NONEON-NOSVE-NEXT: str h0, [sp, #168] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #128] +; NONEON-NOSVE-NEXT: str h0, [sp, #170] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #160] +; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB6_9 +; NONEON-NOSVE-NEXT: .LBB6_16: // %cond.load16 +; NONEON-NOSVE-NEXT: ldr h1, [x0, #12] +; NONEON-NOSVE-NEXT: str q0, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #88] +; NONEON-NOSVE-NEXT: str h1, [sp, #112] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #112] +; NONEON-NOSVE-NEXT: str w9, [sp, #104] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #80] +; NONEON-NOSVE-NEXT: str q0, [sp, #64] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #94] +; NONEON-NOSVE-NEXT: str x9, [sp, #96] +; NONEON-NOSVE-NEXT: str h0, [sp, #110] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #64] +; NONEON-NOSVE-NEXT: str h0, [sp, #108] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #96] +; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB6_10 +; NONEON-NOSVE-NEXT: b .LBB6_11 %load = call <8 x half> @llvm.masked.load.v8f16(ptr %src, i32 8, <8 x i1> %mask, <8 x half> zeroinitializer) ret <8 x half> %load } @@ -210,6 +1991,386 @@ define <16 x half> @masked_load_v16f16(ptr %src, <16 x i1> %mask) { ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1] ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: sub sp, sp, #1024 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 1040 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: str q0, [sp, #976] +; NONEON-NOSVE-NEXT: adrp x9, .LCPI7_0 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #984] +; NONEON-NOSVE-NEXT: and w8, w8, #0x1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #1000] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #976] +; NONEON-NOSVE-NEXT: and w8, w8, #0x1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #992] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #991] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x80 +; NONEON-NOSVE-NEXT: strb w8, [sp, #1007] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #990] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x40 +; NONEON-NOSVE-NEXT: strb w8, [sp, #1006] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #989] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x20 +; NONEON-NOSVE-NEXT: strb w8, [sp, #1005] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #988] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x10 +; NONEON-NOSVE-NEXT: strb w8, [sp, #1004] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #987] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #1003] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #986] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x4 +; NONEON-NOSVE-NEXT: strb w8, [sp, #1002] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #985] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x2 +; NONEON-NOSVE-NEXT: strb w8, [sp, #1001] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #983] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x80 +; NONEON-NOSVE-NEXT: strb w8, [sp, #999] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #982] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x40 +; NONEON-NOSVE-NEXT: strb w8, [sp, #998] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #981] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x20 +; NONEON-NOSVE-NEXT: strb w8, [sp, #997] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #980] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x10 +; NONEON-NOSVE-NEXT: strb w8, [sp, #996] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #979] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #995] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #978] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x4 +; NONEON-NOSVE-NEXT: strb w8, [sp, #994] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #977] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x2 +; NONEON-NOSVE-NEXT: strb w8, [sp, #993] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #992] +; NONEON-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; NONEON-NOSVE-NEXT: zip1 v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI7_0] +; NONEON-NOSVE-NEXT: add x9, sp, #720 +; NONEON-NOSVE-NEXT: str q0, [sp, #1008] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #1010] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #1008] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #1012] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #1014] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #1016] +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #1018] +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #1020] +; NONEON-NOSVE-NEXT: add w8, w10, w8 +; NONEON-NOSVE-NEXT: add w10, w11, w12 +; NONEON-NOSVE-NEXT: add w11, w13, w14 +; NONEON-NOSVE-NEXT: add w8, w8, w10 +; NONEON-NOSVE-NEXT: add w10, w11, w15 +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #1022] +; NONEON-NOSVE-NEXT: add w8, w8, w10 +; NONEON-NOSVE-NEXT: add w8, w8, w11 +; NONEON-NOSVE-NEXT: tbz w8, #0, .LBB7_2 +; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: ldr h2, [x0] +; NONEON-NOSVE-NEXT: stur wzr, [x9, #250] +; NONEON-NOSVE-NEXT: stur xzr, [x9, #242] +; NONEON-NOSVE-NEXT: str h2, [sp, #960] +; NONEON-NOSVE-NEXT: str h0, [sp, #974] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #960] +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB7_3 +; NONEON-NOSVE-NEXT: b .LBB7_4 +; NONEON-NOSVE-NEXT: .LBB7_2: +; NONEON-NOSVE-NEXT: mov v0.16b, v1.16b +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB7_4 +; NONEON-NOSVE-NEXT: .LBB7_3: // %cond.load1 +; NONEON-NOSVE-NEXT: ldr h2, [x0, #2] +; NONEON-NOSVE-NEXT: str q0, [sp, #912] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #924] +; NONEON-NOSVE-NEXT: str h2, [sp, #944] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #944] +; NONEON-NOSVE-NEXT: str w10, [sp, #940] +; NONEON-NOSVE-NEXT: str q0, [sp, #896] +; NONEON-NOSVE-NEXT: ldur x10, [x9, #196] +; NONEON-NOSVE-NEXT: stur x10, [x9, #212] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #912] +; NONEON-NOSVE-NEXT: str h0, [sp, #928] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #896] +; NONEON-NOSVE-NEXT: str h0, [sp, #930] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #928] +; NONEON-NOSVE-NEXT: .LBB7_4: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB7_20 +; NONEON-NOSVE-NEXT: // %bb.5: // %else5 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB7_21 +; NONEON-NOSVE-NEXT: .LBB7_6: // %else8 +; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB7_22 +; NONEON-NOSVE-NEXT: .LBB7_7: // %else11 +; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB7_23 +; NONEON-NOSVE-NEXT: .LBB7_8: // %else14 +; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB7_24 +; NONEON-NOSVE-NEXT: .LBB7_9: // %else17 +; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB7_25 +; NONEON-NOSVE-NEXT: .LBB7_10: // %else20 +; NONEON-NOSVE-NEXT: tbnz w8, #8, .LBB7_26 +; NONEON-NOSVE-NEXT: .LBB7_11: // %else23 +; NONEON-NOSVE-NEXT: add x9, sp, #208 +; NONEON-NOSVE-NEXT: tbnz w8, #9, .LBB7_27 +; NONEON-NOSVE-NEXT: .LBB7_12: // %else26 +; NONEON-NOSVE-NEXT: tbnz w8, #10, .LBB7_28 +; NONEON-NOSVE-NEXT: .LBB7_13: // %else29 +; NONEON-NOSVE-NEXT: tbnz w8, #11, .LBB7_29 +; NONEON-NOSVE-NEXT: .LBB7_14: // %else32 +; NONEON-NOSVE-NEXT: tbnz w8, #12, .LBB7_30 +; NONEON-NOSVE-NEXT: .LBB7_15: // %else35 +; NONEON-NOSVE-NEXT: tbnz w8, #13, .LBB7_31 +; NONEON-NOSVE-NEXT: .LBB7_16: // %else38 +; NONEON-NOSVE-NEXT: tbnz w8, #14, .LBB7_32 +; NONEON-NOSVE-NEXT: .LBB7_17: // %else41 +; NONEON-NOSVE-NEXT: tbz w8, #15, .LBB7_19 +; NONEON-NOSVE-NEXT: .LBB7_18: // %cond.load43 +; NONEON-NOSVE-NEXT: ldr h2, [x0, #30] +; NONEON-NOSVE-NEXT: str q1, [sp] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: str h2, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #48] +; NONEON-NOSVE-NEXT: str w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: str q1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: str x8, [sp, #32] +; NONEON-NOSVE-NEXT: str h1, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #16] +; NONEON-NOSVE-NEXT: str h1, [sp, #46] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #32] +; NONEON-NOSVE-NEXT: .LBB7_19: // %else44 +; NONEON-NOSVE-NEXT: add sp, sp, #1024 +; NONEON-NOSVE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB7_20: // %cond.load4 +; NONEON-NOSVE-NEXT: ldr h2, [x0, #4] +; NONEON-NOSVE-NEXT: str q0, [sp, #848] +; NONEON-NOSVE-NEXT: str h2, [sp, #880] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #880] +; NONEON-NOSVE-NEXT: str q0, [sp, #832] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #862] +; NONEON-NOSVE-NEXT: str h0, [sp, #878] +; NONEON-NOSVE-NEXT: ldur x10, [x9, #134] +; NONEON-NOSVE-NEXT: stur x10, [x9, #150] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #848] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #832] +; NONEON-NOSVE-NEXT: str w10, [sp, #864] +; NONEON-NOSVE-NEXT: str h0, [sp, #868] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #864] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB7_6 +; NONEON-NOSVE-NEXT: .LBB7_21: // %cond.load7 +; NONEON-NOSVE-NEXT: ldr h2, [x0, #6] +; NONEON-NOSVE-NEXT: str q0, [sp, #784] +; NONEON-NOSVE-NEXT: ldr x10, [sp, #792] +; NONEON-NOSVE-NEXT: str h2, [sp, #816] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #816] +; NONEON-NOSVE-NEXT: str x10, [sp, #808] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #784] +; NONEON-NOSVE-NEXT: str q0, [sp, #768] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #788] +; NONEON-NOSVE-NEXT: str w10, [sp, #800] +; NONEON-NOSVE-NEXT: str h0, [sp, #804] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #768] +; NONEON-NOSVE-NEXT: str h0, [sp, #806] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #800] +; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB7_7 +; NONEON-NOSVE-NEXT: .LBB7_22: // %cond.load10 +; NONEON-NOSVE-NEXT: ldr h2, [x0, #8] +; NONEON-NOSVE-NEXT: str q0, [sp, #720] +; NONEON-NOSVE-NEXT: str h2, [sp, #752] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #752] +; NONEON-NOSVE-NEXT: str q0, [sp, #704] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #734] +; NONEON-NOSVE-NEXT: str h0, [sp, #750] +; NONEON-NOSVE-NEXT: ldur w10, [x9, #10] +; NONEON-NOSVE-NEXT: stur w10, [x9, #26] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #720] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #704] +; NONEON-NOSVE-NEXT: str x9, [sp, #736] +; NONEON-NOSVE-NEXT: str h0, [sp, #744] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #736] +; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB7_8 +; NONEON-NOSVE-NEXT: .LBB7_23: // %cond.load13 +; NONEON-NOSVE-NEXT: ldr h2, [x0, #10] +; NONEON-NOSVE-NEXT: str q0, [sp, #656] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #668] +; NONEON-NOSVE-NEXT: str h2, [sp, #688] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #688] +; NONEON-NOSVE-NEXT: str w9, [sp, #684] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #656] +; NONEON-NOSVE-NEXT: str q0, [sp, #640] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #664] +; NONEON-NOSVE-NEXT: str x9, [sp, #672] +; NONEON-NOSVE-NEXT: str h0, [sp, #680] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #640] +; NONEON-NOSVE-NEXT: str h0, [sp, #682] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #672] +; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB7_9 +; NONEON-NOSVE-NEXT: .LBB7_24: // %cond.load16 +; NONEON-NOSVE-NEXT: ldr h2, [x0, #12] +; NONEON-NOSVE-NEXT: str q0, [sp, #592] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #600] +; NONEON-NOSVE-NEXT: str h2, [sp, #624] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #624] +; NONEON-NOSVE-NEXT: str w9, [sp, #616] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #592] +; NONEON-NOSVE-NEXT: str q0, [sp, #576] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #606] +; NONEON-NOSVE-NEXT: str x9, [sp, #608] +; NONEON-NOSVE-NEXT: str h0, [sp, #622] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #576] +; NONEON-NOSVE-NEXT: str h0, [sp, #620] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #608] +; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB7_10 +; NONEON-NOSVE-NEXT: .LBB7_25: // %cond.load19 +; NONEON-NOSVE-NEXT: ldr h2, [x0, #14] +; NONEON-NOSVE-NEXT: str q0, [sp, #512] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #520] +; NONEON-NOSVE-NEXT: str h2, [sp, #560] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #560] +; NONEON-NOSVE-NEXT: str w9, [sp, #552] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #512] +; NONEON-NOSVE-NEXT: str q0, [sp, #528] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #524] +; NONEON-NOSVE-NEXT: str x9, [sp, #544] +; NONEON-NOSVE-NEXT: str h0, [sp, #556] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #528] +; NONEON-NOSVE-NEXT: str h0, [sp, #558] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #544] +; NONEON-NOSVE-NEXT: tbz w8, #8, .LBB7_11 +; NONEON-NOSVE-NEXT: .LBB7_26: // %cond.load22 +; NONEON-NOSVE-NEXT: ldr h2, [x0, #16] +; NONEON-NOSVE-NEXT: str q1, [sp, #464] +; NONEON-NOSVE-NEXT: add x9, sp, #464 +; NONEON-NOSVE-NEXT: str h2, [sp, #496] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #496] +; NONEON-NOSVE-NEXT: str q1, [sp, #448] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #478] +; NONEON-NOSVE-NEXT: str h1, [sp, #494] +; NONEON-NOSVE-NEXT: ldur w10, [x9, #10] +; NONEON-NOSVE-NEXT: ldur x11, [x9, #2] +; NONEON-NOSVE-NEXT: stur w10, [x9, #26] +; NONEON-NOSVE-NEXT: stur x11, [x9, #18] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #448] +; NONEON-NOSVE-NEXT: str h1, [sp, #480] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #480] +; NONEON-NOSVE-NEXT: add x9, sp, #208 +; NONEON-NOSVE-NEXT: tbz w8, #9, .LBB7_12 +; NONEON-NOSVE-NEXT: .LBB7_27: // %cond.load25 +; NONEON-NOSVE-NEXT: ldr h2, [x0, #18] +; NONEON-NOSVE-NEXT: str q1, [sp, #400] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #412] +; NONEON-NOSVE-NEXT: str h2, [sp, #432] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #432] +; NONEON-NOSVE-NEXT: str w10, [sp, #428] +; NONEON-NOSVE-NEXT: str q1, [sp, #384] +; NONEON-NOSVE-NEXT: ldur x10, [x9, #196] +; NONEON-NOSVE-NEXT: stur x10, [x9, #212] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #400] +; NONEON-NOSVE-NEXT: str h1, [sp, #416] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #384] +; NONEON-NOSVE-NEXT: str h1, [sp, #418] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #416] +; NONEON-NOSVE-NEXT: tbz w8, #10, .LBB7_13 +; NONEON-NOSVE-NEXT: .LBB7_28: // %cond.load28 +; NONEON-NOSVE-NEXT: ldr h2, [x0, #20] +; NONEON-NOSVE-NEXT: str q1, [sp, #336] +; NONEON-NOSVE-NEXT: str h2, [sp, #368] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #368] +; NONEON-NOSVE-NEXT: str q1, [sp, #320] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #350] +; NONEON-NOSVE-NEXT: str h1, [sp, #366] +; NONEON-NOSVE-NEXT: ldur x10, [x9, #134] +; NONEON-NOSVE-NEXT: stur x10, [x9, #150] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #336] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #320] +; NONEON-NOSVE-NEXT: str w10, [sp, #352] +; NONEON-NOSVE-NEXT: str h1, [sp, #356] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #352] +; NONEON-NOSVE-NEXT: tbz w8, #11, .LBB7_14 +; NONEON-NOSVE-NEXT: .LBB7_29: // %cond.load31 +; NONEON-NOSVE-NEXT: ldr h2, [x0, #22] +; NONEON-NOSVE-NEXT: str q1, [sp, #272] +; NONEON-NOSVE-NEXT: ldr x10, [sp, #280] +; NONEON-NOSVE-NEXT: str h2, [sp, #304] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #304] +; NONEON-NOSVE-NEXT: str x10, [sp, #296] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #272] +; NONEON-NOSVE-NEXT: str q1, [sp, #256] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #276] +; NONEON-NOSVE-NEXT: str w10, [sp, #288] +; NONEON-NOSVE-NEXT: str h1, [sp, #292] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #256] +; NONEON-NOSVE-NEXT: str h1, [sp, #294] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #288] +; NONEON-NOSVE-NEXT: tbz w8, #12, .LBB7_15 +; NONEON-NOSVE-NEXT: .LBB7_30: // %cond.load34 +; NONEON-NOSVE-NEXT: ldr h2, [x0, #24] +; NONEON-NOSVE-NEXT: str q1, [sp, #208] +; NONEON-NOSVE-NEXT: str h2, [sp, #240] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #240] +; NONEON-NOSVE-NEXT: str q1, [sp, #192] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #222] +; NONEON-NOSVE-NEXT: str h1, [sp, #238] +; NONEON-NOSVE-NEXT: ldur w10, [x9, #10] +; NONEON-NOSVE-NEXT: stur w10, [x9, #26] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #208] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #192] +; NONEON-NOSVE-NEXT: str x9, [sp, #224] +; NONEON-NOSVE-NEXT: str h1, [sp, #232] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #224] +; NONEON-NOSVE-NEXT: tbz w8, #13, .LBB7_16 +; NONEON-NOSVE-NEXT: .LBB7_31: // %cond.load37 +; NONEON-NOSVE-NEXT: ldr h2, [x0, #26] +; NONEON-NOSVE-NEXT: str q1, [sp, #144] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #156] +; NONEON-NOSVE-NEXT: str h2, [sp, #176] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #176] +; NONEON-NOSVE-NEXT: str w9, [sp, #172] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #144] +; NONEON-NOSVE-NEXT: str q1, [sp, #128] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #152] +; NONEON-NOSVE-NEXT: str x9, [sp, #160] +; NONEON-NOSVE-NEXT: str h1, [sp, #168] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #128] +; NONEON-NOSVE-NEXT: str h1, [sp, #170] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #160] +; NONEON-NOSVE-NEXT: tbz w8, #14, .LBB7_17 +; NONEON-NOSVE-NEXT: .LBB7_32: // %cond.load40 +; NONEON-NOSVE-NEXT: ldr h2, [x0, #28] +; NONEON-NOSVE-NEXT: str q1, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #88] +; NONEON-NOSVE-NEXT: str h2, [sp, #112] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #112] +; NONEON-NOSVE-NEXT: str w9, [sp, #104] +; NONEON-NOSVE-NEXT: ldr x9, [sp, #80] +; NONEON-NOSVE-NEXT: str q1, [sp, #64] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #94] +; NONEON-NOSVE-NEXT: str x9, [sp, #96] +; NONEON-NOSVE-NEXT: str h1, [sp, #110] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #64] +; NONEON-NOSVE-NEXT: str h1, [sp, #108] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #96] +; NONEON-NOSVE-NEXT: tbnz w8, #15, .LBB7_18 +; NONEON-NOSVE-NEXT: b .LBB7_19 %load = call <16 x half> @llvm.masked.load.v16f16(ptr %src, i32 8, <16 x i1> %mask, <16 x half> zeroinitializer) ret <16 x half> %load } @@ -225,6 +2386,42 @@ define <2 x float> @masked_load_v2f32(ptr %src, <2 x i1> %mask) { ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: str d0, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #48] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x2 +; NONEON-NOSVE-NEXT: bfxil w8, w9, #0, #1 +; NONEON-NOSVE-NEXT: tbz w8, #0, .LBB8_2 +; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load +; NONEON-NOSVE-NEXT: ldr s0, [x0] +; NONEON-NOSVE-NEXT: str wzr, [sp, #44] +; NONEON-NOSVE-NEXT: str s0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB8_3 +; NONEON-NOSVE-NEXT: b .LBB8_4 +; NONEON-NOSVE-NEXT: .LBB8_2: +; NONEON-NOSVE-NEXT: adrp x9, .LCPI8_0 +; NONEON-NOSVE-NEXT: ldr d0, [x9, :lo12:.LCPI8_0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB8_4 +; NONEON-NOSVE-NEXT: .LBB8_3: // %cond.load1 +; NONEON-NOSVE-NEXT: ldr s1, [x0, #4] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: str s1, [sp, #32] +; NONEON-NOSVE-NEXT: ldr s1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #32] +; NONEON-NOSVE-NEXT: str d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: stp s1, s0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: .LBB8_4: // %else2 +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %load = call <2 x float> @llvm.masked.load.v2f32(ptr %src, i32 8, <2 x i1> %mask, <2 x float> zeroinitializer) ret <2 x float> %load } @@ -241,6 +2438,84 @@ define <4 x float> @masked_load_v4f32(ptr %src, <4 x i1> %mask) { ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #224 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 224 +; NONEON-NOSVE-NEXT: str d0, [sp, #208] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #210] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #212] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #214] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #208] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x2 +; NONEON-NOSVE-NEXT: and w9, w9, #0x4 +; NONEON-NOSVE-NEXT: and w10, w10, #0x8 +; NONEON-NOSVE-NEXT: bfxil w8, w11, #0, #1 +; NONEON-NOSVE-NEXT: orr w9, w9, w10 +; NONEON-NOSVE-NEXT: orr w8, w8, w9 +; NONEON-NOSVE-NEXT: tbz w8, #0, .LBB9_2 +; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load +; NONEON-NOSVE-NEXT: ldr s0, [x0] +; NONEON-NOSVE-NEXT: str wzr, [sp, #204] +; NONEON-NOSVE-NEXT: stur xzr, [sp, #196] +; NONEON-NOSVE-NEXT: str s0, [sp, #192] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #192] +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB9_3 +; NONEON-NOSVE-NEXT: b .LBB9_4 +; NONEON-NOSVE-NEXT: .LBB9_2: +; NONEON-NOSVE-NEXT: adrp x9, .LCPI9_0 +; NONEON-NOSVE-NEXT: ldr q0, [x9, :lo12:.LCPI9_0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB9_4 +; NONEON-NOSVE-NEXT: .LBB9_3: // %cond.load1 +; NONEON-NOSVE-NEXT: ldr s1, [x0, #4] +; NONEON-NOSVE-NEXT: str q0, [sp, #144] +; NONEON-NOSVE-NEXT: str s1, [sp, #176] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #176] +; NONEON-NOSVE-NEXT: str q0, [sp, #128] +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #152] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #168] +; NONEON-NOSVE-NEXT: ldr s1, [sp, #144] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #128] +; NONEON-NOSVE-NEXT: stp s1, s0, [sp, #160] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #160] +; NONEON-NOSVE-NEXT: .LBB9_4: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB9_7 +; NONEON-NOSVE-NEXT: // %bb.5: // %else5 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB9_8 +; NONEON-NOSVE-NEXT: .LBB9_6: // %else8 +; NONEON-NOSVE-NEXT: add sp, sp, #224 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB9_7: // %cond.load4 +; NONEON-NOSVE-NEXT: ldr s1, [x0, #8] +; NONEON-NOSVE-NEXT: str q0, [sp, #80] +; NONEON-NOSVE-NEXT: str s1, [sp, #112] +; NONEON-NOSVE-NEXT: ldr s1, [sp, #92] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #112] +; NONEON-NOSVE-NEXT: str q0, [sp, #64] +; NONEON-NOSVE-NEXT: ldp s0, s2, [sp, #80] +; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #96] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #64] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #104] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #96] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB9_6 +; NONEON-NOSVE-NEXT: .LBB9_8: // %cond.load7 +; NONEON-NOSVE-NEXT: ldr s1, [x0, #12] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: str s1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr s1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: str x8, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: stp s1, s0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #224 +; NONEON-NOSVE-NEXT: ret %load = call <4 x float> @llvm.masked.load.v4f32(ptr %src, i32 8, <4 x i1> %mask, <4 x float> zeroinitializer) ret <4 x float> %load } @@ -290,6 +2565,173 @@ define <8 x float> @masked_load_v8f32(ptr %src, <8 x i1> %mask) { ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #496 +; NONEON-NOSVE-NEXT: str x29, [sp, #480] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 496 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: str d0, [sp, #464] +; NONEON-NOSVE-NEXT: adrp x8, .LCPI10_0 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #466] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #467] +; NONEON-NOSVE-NEXT: ldrb w11, [sp, #465] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #468] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #469] +; NONEON-NOSVE-NEXT: ldrb w15, [sp, #470] +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w11, w11, #0, #1 +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #464] +; NONEON-NOSVE-NEXT: sbfx w13, w13, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w14, w14, #0, #1 +; NONEON-NOSVE-NEXT: and w9, w9, #0x4 +; NONEON-NOSVE-NEXT: and w10, w10, #0x8 +; NONEON-NOSVE-NEXT: sbfx w15, w15, #0, #1 +; NONEON-NOSVE-NEXT: orr w9, w9, w10 +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #471] +; NONEON-NOSVE-NEXT: and w11, w11, #0x2 +; NONEON-NOSVE-NEXT: and w13, w13, #0x10 +; NONEON-NOSVE-NEXT: bfxil w11, w12, #0, #1 +; NONEON-NOSVE-NEXT: and w12, w14, #0x20 +; NONEON-NOSVE-NEXT: orr w9, w9, w13 +; NONEON-NOSVE-NEXT: and w13, w15, #0x40 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: orr w9, w11, w9 +; NONEON-NOSVE-NEXT: orr w11, w12, w13 +; NONEON-NOSVE-NEXT: ldr q1, [x8, :lo12:.LCPI10_0] +; NONEON-NOSVE-NEXT: orr w9, w9, w11 +; NONEON-NOSVE-NEXT: and w10, w10, #0x80 +; NONEON-NOSVE-NEXT: add w10, w9, w10 +; NONEON-NOSVE-NEXT: add x9, sp, #208 +; NONEON-NOSVE-NEXT: and w8, w10, #0xff +; NONEON-NOSVE-NEXT: tbz w10, #0, .LBB10_2 +; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load +; NONEON-NOSVE-NEXT: ldr s0, [x0] +; NONEON-NOSVE-NEXT: str wzr, [sp, #460] +; NONEON-NOSVE-NEXT: stur xzr, [x9, #244] +; NONEON-NOSVE-NEXT: str s0, [sp, #448] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #448] +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB10_3 +; NONEON-NOSVE-NEXT: b .LBB10_4 +; NONEON-NOSVE-NEXT: .LBB10_2: +; NONEON-NOSVE-NEXT: mov v0.16b, v1.16b +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB10_4 +; NONEON-NOSVE-NEXT: .LBB10_3: // %cond.load1 +; NONEON-NOSVE-NEXT: ldr s2, [x0, #4] +; NONEON-NOSVE-NEXT: str q0, [sp, #400] +; NONEON-NOSVE-NEXT: str s2, [sp, #432] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #432] +; NONEON-NOSVE-NEXT: str q0, [sp, #384] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #412] +; NONEON-NOSVE-NEXT: str s0, [sp, #428] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #408] +; NONEON-NOSVE-NEXT: str s0, [sp, #424] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #400] +; NONEON-NOSVE-NEXT: str s0, [sp, #416] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #384] +; NONEON-NOSVE-NEXT: str s0, [sp, #420] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #416] +; NONEON-NOSVE-NEXT: .LBB10_4: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB10_12 +; NONEON-NOSVE-NEXT: // %bb.5: // %else5 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB10_13 +; NONEON-NOSVE-NEXT: .LBB10_6: // %else8 +; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB10_14 +; NONEON-NOSVE-NEXT: .LBB10_7: // %else11 +; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB10_15 +; NONEON-NOSVE-NEXT: .LBB10_8: // %else14 +; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB10_16 +; NONEON-NOSVE-NEXT: .LBB10_9: // %else17 +; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB10_11 +; NONEON-NOSVE-NEXT: .LBB10_10: // %cond.load19 +; NONEON-NOSVE-NEXT: ldr s2, [x0, #28] +; NONEON-NOSVE-NEXT: str q1, [sp] +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: str s2, [sp, #48] +; NONEON-NOSVE-NEXT: ldr s2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #48] +; NONEON-NOSVE-NEXT: str x8, [sp, #32] +; NONEON-NOSVE-NEXT: str q1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr s1, [sp, #16] +; NONEON-NOSVE-NEXT: stp s2, s1, [sp, #40] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #32] +; NONEON-NOSVE-NEXT: .LBB10_11: // %else20 +; NONEON-NOSVE-NEXT: ldr x29, [sp, #480] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: add sp, sp, #496 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB10_12: // %cond.load4 +; NONEON-NOSVE-NEXT: ldr s2, [x0, #8] +; NONEON-NOSVE-NEXT: str q0, [sp, #336] +; NONEON-NOSVE-NEXT: str s2, [sp, #368] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #368] +; NONEON-NOSVE-NEXT: str q0, [sp, #320] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #348] +; NONEON-NOSVE-NEXT: str s0, [sp, #364] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #340] +; NONEON-NOSVE-NEXT: str s0, [sp, #356] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #336] +; NONEON-NOSVE-NEXT: str s0, [sp, #352] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #320] +; NONEON-NOSVE-NEXT: str s0, [sp, #360] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #352] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB10_6 +; NONEON-NOSVE-NEXT: .LBB10_13: // %cond.load7 +; NONEON-NOSVE-NEXT: ldr s2, [x0, #12] +; NONEON-NOSVE-NEXT: str q0, [sp, #256] +; NONEON-NOSVE-NEXT: ldr x10, [sp, #256] +; NONEON-NOSVE-NEXT: str s2, [sp, #304] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #304] +; NONEON-NOSVE-NEXT: str x10, [sp, #288] +; NONEON-NOSVE-NEXT: str q0, [sp, #272] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #264] +; NONEON-NOSVE-NEXT: str s0, [sp, #296] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #272] +; NONEON-NOSVE-NEXT: str s0, [sp, #300] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #288] +; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB10_7 +; NONEON-NOSVE-NEXT: .LBB10_14: // %cond.load10 +; NONEON-NOSVE-NEXT: ldr s2, [x0, #16] +; NONEON-NOSVE-NEXT: str q1, [sp, #208] +; NONEON-NOSVE-NEXT: str s2, [sp, #240] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #240] +; NONEON-NOSVE-NEXT: str q1, [sp, #192] +; NONEON-NOSVE-NEXT: ldr s1, [sp, #220] +; NONEON-NOSVE-NEXT: str s1, [sp, #236] +; NONEON-NOSVE-NEXT: ldur x10, [x9, #4] +; NONEON-NOSVE-NEXT: stur x10, [x9, #20] +; NONEON-NOSVE-NEXT: ldr s1, [sp, #192] +; NONEON-NOSVE-NEXT: str s1, [sp, #224] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #224] +; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB10_8 +; NONEON-NOSVE-NEXT: .LBB10_15: // %cond.load13 +; NONEON-NOSVE-NEXT: ldr s2, [x0, #20] +; NONEON-NOSVE-NEXT: str q1, [sp, #144] +; NONEON-NOSVE-NEXT: str s2, [sp, #176] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #176] +; NONEON-NOSVE-NEXT: str q1, [sp, #128] +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #152] +; NONEON-NOSVE-NEXT: stp s1, s2, [sp, #168] +; NONEON-NOSVE-NEXT: ldr s2, [sp, #144] +; NONEON-NOSVE-NEXT: ldr s1, [sp, #128] +; NONEON-NOSVE-NEXT: stp s2, s1, [sp, #160] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #160] +; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB10_9 +; NONEON-NOSVE-NEXT: .LBB10_16: // %cond.load16 +; NONEON-NOSVE-NEXT: ldr s2, [x0, #24] +; NONEON-NOSVE-NEXT: str q1, [sp, #80] +; NONEON-NOSVE-NEXT: str s2, [sp, #112] +; NONEON-NOSVE-NEXT: ldr s2, [sp, #92] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #112] +; NONEON-NOSVE-NEXT: str q1, [sp, #64] +; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #80] +; NONEON-NOSVE-NEXT: stp s1, s3, [sp, #96] +; NONEON-NOSVE-NEXT: ldr s1, [sp, #64] +; NONEON-NOSVE-NEXT: stp s1, s2, [sp, #104] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #96] +; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB10_10 +; NONEON-NOSVE-NEXT: b .LBB10_11 %load = call <8 x float> @llvm.masked.load.v8f32(ptr %src, i32 8, <8 x i1> %mask, <8 x float> zeroinitializer) ret <8 x float> %load } @@ -306,6 +2748,42 @@ define <2 x double> @masked_load_v2f64(ptr %src, <2 x i1> %mask) { ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: str d0, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #80] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x2 +; NONEON-NOSVE-NEXT: bfxil w8, w9, #0, #1 +; NONEON-NOSVE-NEXT: tbz w8, #0, .LBB11_2 +; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str xzr, [sp, #72] +; NONEON-NOSVE-NEXT: str d0, [sp, #64] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB11_3 +; NONEON-NOSVE-NEXT: b .LBB11_4 +; NONEON-NOSVE-NEXT: .LBB11_2: +; NONEON-NOSVE-NEXT: adrp x9, .LCPI11_0 +; NONEON-NOSVE-NEXT: ldr q0, [x9, :lo12:.LCPI11_0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB11_4 +; NONEON-NOSVE-NEXT: .LBB11_3: // %cond.load1 +; NONEON-NOSVE-NEXT: ldr d1, [x0, #8] +; NONEON-NOSVE-NEXT: str q0, [sp] +; NONEON-NOSVE-NEXT: str d1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr d1, [sp] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: .LBB11_4: // %else2 +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %load = call <2 x double> @llvm.masked.load.v2f64(ptr %src, i32 8, <2 x i1> %mask, <2 x double> zeroinitializer) ret <2 x double> %load } @@ -331,6 +2809,78 @@ define <4 x double> @masked_load_v4f64(ptr %src, <4 x i1> %mask) { ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, x8, lsl #3] ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #224 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 224 +; NONEON-NOSVE-NEXT: str d0, [sp, #208] +; NONEON-NOSVE-NEXT: adrp x9, .LCPI12_0 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #210] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #212] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #214] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #208] +; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI12_0] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w11, w11, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x2 +; NONEON-NOSVE-NEXT: and w10, w10, #0x4 +; NONEON-NOSVE-NEXT: and w11, w11, #0x8 +; NONEON-NOSVE-NEXT: bfxil w8, w12, #0, #1 +; NONEON-NOSVE-NEXT: orr w10, w10, w11 +; NONEON-NOSVE-NEXT: orr w8, w8, w10 +; NONEON-NOSVE-NEXT: tbz w8, #0, .LBB12_2 +; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str xzr, [sp, #200] +; NONEON-NOSVE-NEXT: str d0, [sp, #192] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #192] +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB12_3 +; NONEON-NOSVE-NEXT: b .LBB12_4 +; NONEON-NOSVE-NEXT: .LBB12_2: +; NONEON-NOSVE-NEXT: mov v0.16b, v1.16b +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB12_4 +; NONEON-NOSVE-NEXT: .LBB12_3: // %cond.load1 +; NONEON-NOSVE-NEXT: ldr d2, [x0, #8] +; NONEON-NOSVE-NEXT: str q0, [sp, #128] +; NONEON-NOSVE-NEXT: str d2, [sp, #176] +; NONEON-NOSVE-NEXT: ldr d2, [sp, #128] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #176] +; NONEON-NOSVE-NEXT: str q0, [sp, #144] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #144] +; NONEON-NOSVE-NEXT: stp d2, d0, [sp, #160] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #160] +; NONEON-NOSVE-NEXT: .LBB12_4: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB12_7 +; NONEON-NOSVE-NEXT: // %bb.5: // %else5 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB12_8 +; NONEON-NOSVE-NEXT: .LBB12_6: // %else8 +; NONEON-NOSVE-NEXT: add sp, sp, #224 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB12_7: // %cond.load4 +; NONEON-NOSVE-NEXT: ldr d2, [x0, #16] +; NONEON-NOSVE-NEXT: str q1, [sp, #80] +; NONEON-NOSVE-NEXT: str d2, [sp, #112] +; NONEON-NOSVE-NEXT: ldr d2, [sp, #88] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #112] +; NONEON-NOSVE-NEXT: str q1, [sp, #64] +; NONEON-NOSVE-NEXT: ldr d1, [sp, #64] +; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #96] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #96] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB12_6 +; NONEON-NOSVE-NEXT: .LBB12_8: // %cond.load7 +; NONEON-NOSVE-NEXT: ldr d2, [x0, #24] +; NONEON-NOSVE-NEXT: str q1, [sp] +; NONEON-NOSVE-NEXT: str d2, [sp, #48] +; NONEON-NOSVE-NEXT: ldr d2, [sp] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #48] +; NONEON-NOSVE-NEXT: str q1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr d1, [sp, #16] +; NONEON-NOSVE-NEXT: stp d2, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #224 +; NONEON-NOSVE-NEXT: ret %load = call <4 x double> @llvm.masked.load.v4f64(ptr %src, i32 8, <4 x i1> %mask, <4 x double> zeroinitializer) ret <4 x double> %load } @@ -356,6 +2906,55 @@ define <3 x i32> @masked_load_zext_v3i32(ptr %load_ptr, <3 x i1> %pm) { ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_zext_v3i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #80 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: and w8, w1, #0x1 +; NONEON-NOSVE-NEXT: bfi w8, w2, #1, #1 +; NONEON-NOSVE-NEXT: bfi w8, w3, #2, #1 +; NONEON-NOSVE-NEXT: tbz w8, #0, .LBB13_2 +; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load +; NONEON-NOSVE-NEXT: ldrh w9, [x0] +; NONEON-NOSVE-NEXT: stur wzr, [sp, #66] +; NONEON-NOSVE-NEXT: strh w9, [sp, #64] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #64] +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB13_3 +; NONEON-NOSVE-NEXT: b .LBB13_4 +; NONEON-NOSVE-NEXT: .LBB13_2: +; NONEON-NOSVE-NEXT: adrp x9, .LCPI13_0 +; NONEON-NOSVE-NEXT: ldr d0, [x9, :lo12:.LCPI13_0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB13_4 +; NONEON-NOSVE-NEXT: .LBB13_3: // %cond.load1 +; NONEON-NOSVE-NEXT: ldrh w9, [x0, #2] +; NONEON-NOSVE-NEXT: str d0, [sp, #48] +; NONEON-NOSVE-NEXT: strh w9, [sp, #58] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #52] +; NONEON-NOSVE-NEXT: strh w9, [sp, #60] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #48] +; NONEON-NOSVE-NEXT: strh w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #56] +; NONEON-NOSVE-NEXT: .LBB13_4: // %else2 +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB13_6 +; NONEON-NOSVE-NEXT: // %bb.5: // %cond.load4 +; NONEON-NOSVE-NEXT: ldrh w8, [x0, #4] +; NONEON-NOSVE-NEXT: str d0, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] +; NONEON-NOSVE-NEXT: str w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: .LBB13_6: // %else5 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %load_value = tail call <3 x i16> @llvm.masked.load.v3i16.p0(ptr %load_ptr, i32 4, <3 x i1> %pm, <3 x i16> zeroinitializer) %extend = zext <3 x i16> %load_value to <3 x i32> ret <3 x i32> %extend; @@ -382,6 +2981,55 @@ define <3 x i32> @masked_load_sext_v3i32(ptr %load_ptr, <3 x i1> %pm) { ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_sext_v3i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #80 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: and w8, w1, #0x1 +; NONEON-NOSVE-NEXT: bfi w8, w2, #1, #1 +; NONEON-NOSVE-NEXT: bfi w8, w3, #2, #1 +; NONEON-NOSVE-NEXT: tbz w8, #0, .LBB14_2 +; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load +; NONEON-NOSVE-NEXT: ldrh w9, [x0] +; NONEON-NOSVE-NEXT: stur wzr, [sp, #66] +; NONEON-NOSVE-NEXT: strh w9, [sp, #64] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #64] +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB14_3 +; NONEON-NOSVE-NEXT: b .LBB14_4 +; NONEON-NOSVE-NEXT: .LBB14_2: +; NONEON-NOSVE-NEXT: adrp x9, .LCPI14_0 +; NONEON-NOSVE-NEXT: ldr d0, [x9, :lo12:.LCPI14_0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB14_4 +; NONEON-NOSVE-NEXT: .LBB14_3: // %cond.load1 +; NONEON-NOSVE-NEXT: ldrh w9, [x0, #2] +; NONEON-NOSVE-NEXT: str d0, [sp, #48] +; NONEON-NOSVE-NEXT: strh w9, [sp, #58] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #52] +; NONEON-NOSVE-NEXT: strh w9, [sp, #60] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #48] +; NONEON-NOSVE-NEXT: strh w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #56] +; NONEON-NOSVE-NEXT: .LBB14_4: // %else2 +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB14_6 +; NONEON-NOSVE-NEXT: // %bb.5: // %cond.load4 +; NONEON-NOSVE-NEXT: ldrh w8, [x0, #4] +; NONEON-NOSVE-NEXT: str d0, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] +; NONEON-NOSVE-NEXT: str w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: .LBB14_6: // %else5 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #12] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %load_value = tail call <3 x i16> @llvm.masked.load.v3i16.p0(ptr %load_ptr, i32 4, <3 x i1> %pm, <3 x i16> zeroinitializer) %extend = sext <3 x i16> %load_value to <3 x i32> ret <3 x i32> %extend; diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll index bd6b96889b4cc..a79ce9db9abfd 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -19,6 +20,47 @@ define void @masked_store_v4i8(ptr %dst, <4 x i1> %mask) { ; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: st1b { z0.h }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_store_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #6] +; NONEON-NOSVE-NEXT: ldrh w11, [sp] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x2 +; NONEON-NOSVE-NEXT: and w9, w9, #0x4 +; NONEON-NOSVE-NEXT: and w10, w10, #0x8 +; NONEON-NOSVE-NEXT: bfxil w8, w11, #0, #1 +; NONEON-NOSVE-NEXT: orr w9, w9, w10 +; NONEON-NOSVE-NEXT: orr w8, w8, w9 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB0_5 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB0_6 +; NONEON-NOSVE-NEXT: .LBB0_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB0_7 +; NONEON-NOSVE-NEXT: .LBB0_3: // %else4 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB0_8 +; NONEON-NOSVE-NEXT: .LBB0_4: // %else6 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB0_5: // %cond.store +; NONEON-NOSVE-NEXT: strb wzr, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB0_2 +; NONEON-NOSVE-NEXT: .LBB0_6: // %cond.store1 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #1] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB0_3 +; NONEON-NOSVE-NEXT: .LBB0_7: // %cond.store3 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #2] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB0_4 +; NONEON-NOSVE-NEXT: .LBB0_8: // %cond.store5 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #3] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret call void @llvm.masked.store.v4i8(<4 x i8> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask) ret void } @@ -34,6 +76,84 @@ define void @masked_store_v8i8(ptr %dst, <8 x i1> %mask) { ; CHECK-NEXT: mov z0.b, #0 // =0x0 ; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_store_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #4] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #5] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #6] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldrb w11, [sp] +; NONEON-NOSVE-NEXT: sbfx w12, w12, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w13, w13, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x4 +; NONEON-NOSVE-NEXT: and w9, w9, #0x8 +; NONEON-NOSVE-NEXT: sbfx w14, w14, #0, #1 +; NONEON-NOSVE-NEXT: orr w8, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: and w10, w10, #0x2 +; NONEON-NOSVE-NEXT: and w12, w12, #0x10 +; NONEON-NOSVE-NEXT: bfxil w10, w11, #0, #1 +; NONEON-NOSVE-NEXT: and w11, w13, #0x20 +; NONEON-NOSVE-NEXT: orr w8, w8, w12 +; NONEON-NOSVE-NEXT: and w12, w14, #0x40 +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: orr w8, w10, w8 +; NONEON-NOSVE-NEXT: orr w10, w11, w12 +; NONEON-NOSVE-NEXT: orr w8, w8, w10 +; NONEON-NOSVE-NEXT: and w9, w9, #0x80 +; NONEON-NOSVE-NEXT: add w9, w8, w9 +; NONEON-NOSVE-NEXT: and w8, w9, #0xff +; NONEON-NOSVE-NEXT: tbnz w9, #0, .LBB1_9 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB1_10 +; NONEON-NOSVE-NEXT: .LBB1_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB1_11 +; NONEON-NOSVE-NEXT: .LBB1_3: // %else4 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB1_12 +; NONEON-NOSVE-NEXT: .LBB1_4: // %else6 +; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB1_13 +; NONEON-NOSVE-NEXT: .LBB1_5: // %else8 +; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB1_14 +; NONEON-NOSVE-NEXT: .LBB1_6: // %else10 +; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB1_15 +; NONEON-NOSVE-NEXT: .LBB1_7: // %else12 +; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB1_16 +; NONEON-NOSVE-NEXT: .LBB1_8: // %else14 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB1_9: // %cond.store +; NONEON-NOSVE-NEXT: strb wzr, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB1_2 +; NONEON-NOSVE-NEXT: .LBB1_10: // %cond.store1 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #1] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB1_3 +; NONEON-NOSVE-NEXT: .LBB1_11: // %cond.store3 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #2] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB1_4 +; NONEON-NOSVE-NEXT: .LBB1_12: // %cond.store5 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #3] +; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB1_5 +; NONEON-NOSVE-NEXT: .LBB1_13: // %cond.store7 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #4] +; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB1_6 +; NONEON-NOSVE-NEXT: .LBB1_14: // %cond.store9 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #5] +; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB1_7 +; NONEON-NOSVE-NEXT: .LBB1_15: // %cond.store11 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #6] +; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB1_8 +; NONEON-NOSVE-NEXT: .LBB1_16: // %cond.store13 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #7] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret call void @llvm.masked.store.v8i8(<8 x i8> zeroinitializer, ptr %dst, i32 8, <8 x i1> %mask) ret void } @@ -49,6 +169,175 @@ define void @masked_store_v16i8(ptr %dst, <16 x i1> %mask) { ; CHECK-NEXT: mov z0.b, #0 // =0x0 ; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_store_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: and w8, w8, #0x1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: and w8, w8, #0x1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x80 +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x40 +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x20 +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x10 +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x4 +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x2 +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x80 +; NONEON-NOSVE-NEXT: strb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x40 +; NONEON-NOSVE-NEXT: strb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x20 +; NONEON-NOSVE-NEXT: strb w8, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x10 +; NONEON-NOSVE-NEXT: strb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x4 +; NONEON-NOSVE-NEXT: strb w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x2 +; NONEON-NOSVE-NEXT: strb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; NONEON-NOSVE-NEXT: zip1 v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: str q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #44] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: add w9, w10, w11 +; NONEON-NOSVE-NEXT: add w10, w12, w13 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: add w9, w10, w14 +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #46] +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: add w8, w8, w10 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB2_17 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB2_18 +; NONEON-NOSVE-NEXT: .LBB2_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB2_19 +; NONEON-NOSVE-NEXT: .LBB2_3: // %else4 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB2_20 +; NONEON-NOSVE-NEXT: .LBB2_4: // %else6 +; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB2_21 +; NONEON-NOSVE-NEXT: .LBB2_5: // %else8 +; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB2_22 +; NONEON-NOSVE-NEXT: .LBB2_6: // %else10 +; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB2_23 +; NONEON-NOSVE-NEXT: .LBB2_7: // %else12 +; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB2_24 +; NONEON-NOSVE-NEXT: .LBB2_8: // %else14 +; NONEON-NOSVE-NEXT: tbnz w8, #8, .LBB2_25 +; NONEON-NOSVE-NEXT: .LBB2_9: // %else16 +; NONEON-NOSVE-NEXT: tbnz w8, #9, .LBB2_26 +; NONEON-NOSVE-NEXT: .LBB2_10: // %else18 +; NONEON-NOSVE-NEXT: tbnz w8, #10, .LBB2_27 +; NONEON-NOSVE-NEXT: .LBB2_11: // %else20 +; NONEON-NOSVE-NEXT: tbnz w8, #11, .LBB2_28 +; NONEON-NOSVE-NEXT: .LBB2_12: // %else22 +; NONEON-NOSVE-NEXT: tbnz w8, #12, .LBB2_29 +; NONEON-NOSVE-NEXT: .LBB2_13: // %else24 +; NONEON-NOSVE-NEXT: tbnz w8, #13, .LBB2_30 +; NONEON-NOSVE-NEXT: .LBB2_14: // %else26 +; NONEON-NOSVE-NEXT: tbnz w8, #14, .LBB2_31 +; NONEON-NOSVE-NEXT: .LBB2_15: // %else28 +; NONEON-NOSVE-NEXT: tbnz w8, #15, .LBB2_32 +; NONEON-NOSVE-NEXT: .LBB2_16: // %else30 +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB2_17: // %cond.store +; NONEON-NOSVE-NEXT: strb wzr, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB2_2 +; NONEON-NOSVE-NEXT: .LBB2_18: // %cond.store1 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #1] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB2_3 +; NONEON-NOSVE-NEXT: .LBB2_19: // %cond.store3 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #2] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB2_4 +; NONEON-NOSVE-NEXT: .LBB2_20: // %cond.store5 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #3] +; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB2_5 +; NONEON-NOSVE-NEXT: .LBB2_21: // %cond.store7 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #4] +; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB2_6 +; NONEON-NOSVE-NEXT: .LBB2_22: // %cond.store9 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #5] +; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB2_7 +; NONEON-NOSVE-NEXT: .LBB2_23: // %cond.store11 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #6] +; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB2_8 +; NONEON-NOSVE-NEXT: .LBB2_24: // %cond.store13 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #7] +; NONEON-NOSVE-NEXT: tbz w8, #8, .LBB2_9 +; NONEON-NOSVE-NEXT: .LBB2_25: // %cond.store15 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #8] +; NONEON-NOSVE-NEXT: tbz w8, #9, .LBB2_10 +; NONEON-NOSVE-NEXT: .LBB2_26: // %cond.store17 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #9] +; NONEON-NOSVE-NEXT: tbz w8, #10, .LBB2_11 +; NONEON-NOSVE-NEXT: .LBB2_27: // %cond.store19 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #10] +; NONEON-NOSVE-NEXT: tbz w8, #11, .LBB2_12 +; NONEON-NOSVE-NEXT: .LBB2_28: // %cond.store21 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #11] +; NONEON-NOSVE-NEXT: tbz w8, #12, .LBB2_13 +; NONEON-NOSVE-NEXT: .LBB2_29: // %cond.store23 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #12] +; NONEON-NOSVE-NEXT: tbz w8, #13, .LBB2_14 +; NONEON-NOSVE-NEXT: .LBB2_30: // %cond.store25 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #13] +; NONEON-NOSVE-NEXT: tbz w8, #14, .LBB2_15 +; NONEON-NOSVE-NEXT: .LBB2_31: // %cond.store27 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #14] +; NONEON-NOSVE-NEXT: tbz w8, #15, .LBB2_16 +; NONEON-NOSVE-NEXT: .LBB2_32: // %cond.store29 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #15] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret call void @llvm.masked.store.v16i8(<16 x i8> zeroinitializer, ptr %dst, i32 8, <16 x i1> %mask) ret void } @@ -129,6 +418,331 @@ define void @masked_store_v32i8(ptr %dst, <32 x i1> %mask) { ; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_store_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #80 +; NONEON-NOSVE-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #216] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #152] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #272] +; NONEON-NOSVE-NEXT: ldr w11, [sp, #176] +; NONEON-NOSVE-NEXT: ldr w12, [sp, #160] +; NONEON-NOSVE-NEXT: and w8, w8, #0x1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: and w8, w9, #0x1 +; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #264] +; NONEON-NOSVE-NEXT: strb w8, [sp, #16] +; NONEON-NOSVE-NEXT: sbfx w11, w11, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w9, #0x80 +; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #256] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: and w8, w9, #0x40 +; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #248] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: and w8, w9, #0x20 +; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #240] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: and w8, w9, #0x10 +; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #232] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: and w8, w9, #0x8 +; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #224] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: and w8, w9, #0x4 +; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #208] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: and w8, w9, #0x2 +; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #200] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: and w8, w9, #0x80 +; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #192] +; NONEON-NOSVE-NEXT: strb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #184] +; NONEON-NOSVE-NEXT: and w9, w9, #0x40 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: strb w9, [sp, #22] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #168] +; NONEON-NOSVE-NEXT: and w10, w10, #0x20 +; NONEON-NOSVE-NEXT: and w8, w8, #0x10 +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: strb w10, [sp, #21] +; NONEON-NOSVE-NEXT: strb w8, [sp, #20] +; NONEON-NOSVE-NEXT: and w8, w11, #0x8 +; NONEON-NOSVE-NEXT: sbfx w10, w12, #0, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #19] +; NONEON-NOSVE-NEXT: and w8, w9, #0x4 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #88] +; NONEON-NOSVE-NEXT: strb w8, [sp, #18] +; NONEON-NOSVE-NEXT: and w8, w10, #0x2 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #136] +; NONEON-NOSVE-NEXT: strb w8, [sp, #17] +; NONEON-NOSVE-NEXT: and w8, w9, #0x1 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #144] +; NONEON-NOSVE-NEXT: strb w8, [sp, #8] +; NONEON-NOSVE-NEXT: and w8, w1, #0x1 +; NONEON-NOSVE-NEXT: ldr w11, [sp, #104] +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp] +; NONEON-NOSVE-NEXT: ldr w12, [sp, #80] +; NONEON-NOSVE-NEXT: sbfx w11, w11, #0, #1 +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: and w8, w9, #0x80 +; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #128] +; NONEON-NOSVE-NEXT: strb w8, [sp, #15] +; NONEON-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; NONEON-NOSVE-NEXT: and w8, w9, #0x40 +; NONEON-NOSVE-NEXT: sbfx w9, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #120] +; NONEON-NOSVE-NEXT: strb w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] +; NONEON-NOSVE-NEXT: and w9, w9, #0x20 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: strb w9, [sp, #13] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #96] +; NONEON-NOSVE-NEXT: and w10, w10, #0x10 +; NONEON-NOSVE-NEXT: zip1 v2.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: and w8, w8, #0x8 +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: strb w10, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #11] +; NONEON-NOSVE-NEXT: and w8, w11, #0x4 +; NONEON-NOSVE-NEXT: sbfx w10, w12, #0, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #10] +; NONEON-NOSVE-NEXT: and w8, w9, #0x2 +; NONEON-NOSVE-NEXT: sbfx w9, w7, #0, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #9] +; NONEON-NOSVE-NEXT: and w8, w10, #0x80 +; NONEON-NOSVE-NEXT: sbfx w10, w6, #0, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #7] +; NONEON-NOSVE-NEXT: and w8, w9, #0x40 +; NONEON-NOSVE-NEXT: sbfx w9, w5, #0, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #6] +; NONEON-NOSVE-NEXT: and w8, w10, #0x20 +; NONEON-NOSVE-NEXT: sbfx w10, w4, #0, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #5] +; NONEON-NOSVE-NEXT: and w8, w9, #0x10 +; NONEON-NOSVE-NEXT: sbfx w9, w3, #0, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #4] +; NONEON-NOSVE-NEXT: and w8, w10, #0x8 +; NONEON-NOSVE-NEXT: sbfx w10, w2, #0, #1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #3] +; NONEON-NOSVE-NEXT: and w8, w9, #0x4 +; NONEON-NOSVE-NEXT: strb w8, [sp, #2] +; NONEON-NOSVE-NEXT: and w8, w10, #0x2 +; NONEON-NOSVE-NEXT: strb w8, [sp, #1] +; NONEON-NOSVE-NEXT: ldr q0, [sp] +; NONEON-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; NONEON-NOSVE-NEXT: zip1 v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q2, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #52] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #54] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #56] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #58] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #40] +; NONEON-NOSVE-NEXT: add w9, w10, w11 +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #60] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #34] +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: add w9, w12, w13 +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w16, [sp, #42] +; NONEON-NOSVE-NEXT: add w9, w9, w10 +; NONEON-NOSVE-NEXT: add w10, w12, w11 +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #44] +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: add w12, w13, w14 +; NONEON-NOSVE-NEXT: add w14, w15, w16 +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #62] +; NONEON-NOSVE-NEXT: add w10, w10, w12 +; NONEON-NOSVE-NEXT: add w11, w14, w11 +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #46] +; NONEON-NOSVE-NEXT: add w9, w10, w11 +; NONEON-NOSVE-NEXT: add w10, w8, w13 +; NONEON-NOSVE-NEXT: add w8, w9, w12 +; NONEON-NOSVE-NEXT: bfi w8, w10, #16, #16 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB3_34 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB3_35 +; NONEON-NOSVE-NEXT: .LBB3_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB3_36 +; NONEON-NOSVE-NEXT: .LBB3_3: // %else4 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB3_37 +; NONEON-NOSVE-NEXT: .LBB3_4: // %else6 +; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB3_38 +; NONEON-NOSVE-NEXT: .LBB3_5: // %else8 +; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB3_39 +; NONEON-NOSVE-NEXT: .LBB3_6: // %else10 +; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB3_40 +; NONEON-NOSVE-NEXT: .LBB3_7: // %else12 +; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB3_41 +; NONEON-NOSVE-NEXT: .LBB3_8: // %else14 +; NONEON-NOSVE-NEXT: tbnz w8, #8, .LBB3_42 +; NONEON-NOSVE-NEXT: .LBB3_9: // %else16 +; NONEON-NOSVE-NEXT: tbnz w8, #9, .LBB3_43 +; NONEON-NOSVE-NEXT: .LBB3_10: // %else18 +; NONEON-NOSVE-NEXT: tbnz w8, #10, .LBB3_44 +; NONEON-NOSVE-NEXT: .LBB3_11: // %else20 +; NONEON-NOSVE-NEXT: tbnz w8, #11, .LBB3_45 +; NONEON-NOSVE-NEXT: .LBB3_12: // %else22 +; NONEON-NOSVE-NEXT: tbnz w8, #12, .LBB3_46 +; NONEON-NOSVE-NEXT: .LBB3_13: // %else24 +; NONEON-NOSVE-NEXT: tbnz w8, #13, .LBB3_47 +; NONEON-NOSVE-NEXT: .LBB3_14: // %else26 +; NONEON-NOSVE-NEXT: tbnz w8, #14, .LBB3_48 +; NONEON-NOSVE-NEXT: .LBB3_15: // %else28 +; NONEON-NOSVE-NEXT: tbnz w8, #15, .LBB3_49 +; NONEON-NOSVE-NEXT: .LBB3_16: // %else30 +; NONEON-NOSVE-NEXT: tbnz w8, #16, .LBB3_50 +; NONEON-NOSVE-NEXT: .LBB3_17: // %else32 +; NONEON-NOSVE-NEXT: tbnz w8, #17, .LBB3_51 +; NONEON-NOSVE-NEXT: .LBB3_18: // %else34 +; NONEON-NOSVE-NEXT: tbnz w8, #18, .LBB3_52 +; NONEON-NOSVE-NEXT: .LBB3_19: // %else36 +; NONEON-NOSVE-NEXT: tbnz w8, #19, .LBB3_53 +; NONEON-NOSVE-NEXT: .LBB3_20: // %else38 +; NONEON-NOSVE-NEXT: tbnz w8, #20, .LBB3_54 +; NONEON-NOSVE-NEXT: .LBB3_21: // %else40 +; NONEON-NOSVE-NEXT: tbnz w8, #21, .LBB3_55 +; NONEON-NOSVE-NEXT: .LBB3_22: // %else42 +; NONEON-NOSVE-NEXT: tbnz w8, #22, .LBB3_56 +; NONEON-NOSVE-NEXT: .LBB3_23: // %else44 +; NONEON-NOSVE-NEXT: tbnz w8, #23, .LBB3_57 +; NONEON-NOSVE-NEXT: .LBB3_24: // %else46 +; NONEON-NOSVE-NEXT: tbnz w8, #24, .LBB3_58 +; NONEON-NOSVE-NEXT: .LBB3_25: // %else48 +; NONEON-NOSVE-NEXT: tbnz w8, #25, .LBB3_59 +; NONEON-NOSVE-NEXT: .LBB3_26: // %else50 +; NONEON-NOSVE-NEXT: tbnz w8, #26, .LBB3_60 +; NONEON-NOSVE-NEXT: .LBB3_27: // %else52 +; NONEON-NOSVE-NEXT: tbnz w8, #27, .LBB3_61 +; NONEON-NOSVE-NEXT: .LBB3_28: // %else54 +; NONEON-NOSVE-NEXT: tbnz w8, #28, .LBB3_62 +; NONEON-NOSVE-NEXT: .LBB3_29: // %else56 +; NONEON-NOSVE-NEXT: tbnz w8, #29, .LBB3_63 +; NONEON-NOSVE-NEXT: .LBB3_30: // %else58 +; NONEON-NOSVE-NEXT: tbnz w8, #30, .LBB3_64 +; NONEON-NOSVE-NEXT: .LBB3_31: // %else60 +; NONEON-NOSVE-NEXT: tbz w8, #31, .LBB3_33 +; NONEON-NOSVE-NEXT: .LBB3_32: // %cond.store61 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #31] +; NONEON-NOSVE-NEXT: .LBB3_33: // %else62 +; NONEON-NOSVE-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB3_34: // %cond.store +; NONEON-NOSVE-NEXT: strb wzr, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB3_2 +; NONEON-NOSVE-NEXT: .LBB3_35: // %cond.store1 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #1] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB3_3 +; NONEON-NOSVE-NEXT: .LBB3_36: // %cond.store3 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #2] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB3_4 +; NONEON-NOSVE-NEXT: .LBB3_37: // %cond.store5 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #3] +; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB3_5 +; NONEON-NOSVE-NEXT: .LBB3_38: // %cond.store7 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #4] +; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB3_6 +; NONEON-NOSVE-NEXT: .LBB3_39: // %cond.store9 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #5] +; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB3_7 +; NONEON-NOSVE-NEXT: .LBB3_40: // %cond.store11 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #6] +; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB3_8 +; NONEON-NOSVE-NEXT: .LBB3_41: // %cond.store13 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #7] +; NONEON-NOSVE-NEXT: tbz w8, #8, .LBB3_9 +; NONEON-NOSVE-NEXT: .LBB3_42: // %cond.store15 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #8] +; NONEON-NOSVE-NEXT: tbz w8, #9, .LBB3_10 +; NONEON-NOSVE-NEXT: .LBB3_43: // %cond.store17 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #9] +; NONEON-NOSVE-NEXT: tbz w8, #10, .LBB3_11 +; NONEON-NOSVE-NEXT: .LBB3_44: // %cond.store19 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #10] +; NONEON-NOSVE-NEXT: tbz w8, #11, .LBB3_12 +; NONEON-NOSVE-NEXT: .LBB3_45: // %cond.store21 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #11] +; NONEON-NOSVE-NEXT: tbz w8, #12, .LBB3_13 +; NONEON-NOSVE-NEXT: .LBB3_46: // %cond.store23 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #12] +; NONEON-NOSVE-NEXT: tbz w8, #13, .LBB3_14 +; NONEON-NOSVE-NEXT: .LBB3_47: // %cond.store25 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #13] +; NONEON-NOSVE-NEXT: tbz w8, #14, .LBB3_15 +; NONEON-NOSVE-NEXT: .LBB3_48: // %cond.store27 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #14] +; NONEON-NOSVE-NEXT: tbz w8, #15, .LBB3_16 +; NONEON-NOSVE-NEXT: .LBB3_49: // %cond.store29 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #15] +; NONEON-NOSVE-NEXT: tbz w8, #16, .LBB3_17 +; NONEON-NOSVE-NEXT: .LBB3_50: // %cond.store31 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #16] +; NONEON-NOSVE-NEXT: tbz w8, #17, .LBB3_18 +; NONEON-NOSVE-NEXT: .LBB3_51: // %cond.store33 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #17] +; NONEON-NOSVE-NEXT: tbz w8, #18, .LBB3_19 +; NONEON-NOSVE-NEXT: .LBB3_52: // %cond.store35 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #18] +; NONEON-NOSVE-NEXT: tbz w8, #19, .LBB3_20 +; NONEON-NOSVE-NEXT: .LBB3_53: // %cond.store37 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #19] +; NONEON-NOSVE-NEXT: tbz w8, #20, .LBB3_21 +; NONEON-NOSVE-NEXT: .LBB3_54: // %cond.store39 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #20] +; NONEON-NOSVE-NEXT: tbz w8, #21, .LBB3_22 +; NONEON-NOSVE-NEXT: .LBB3_55: // %cond.store41 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #21] +; NONEON-NOSVE-NEXT: tbz w8, #22, .LBB3_23 +; NONEON-NOSVE-NEXT: .LBB3_56: // %cond.store43 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #22] +; NONEON-NOSVE-NEXT: tbz w8, #23, .LBB3_24 +; NONEON-NOSVE-NEXT: .LBB3_57: // %cond.store45 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #23] +; NONEON-NOSVE-NEXT: tbz w8, #24, .LBB3_25 +; NONEON-NOSVE-NEXT: .LBB3_58: // %cond.store47 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #24] +; NONEON-NOSVE-NEXT: tbz w8, #25, .LBB3_26 +; NONEON-NOSVE-NEXT: .LBB3_59: // %cond.store49 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #25] +; NONEON-NOSVE-NEXT: tbz w8, #26, .LBB3_27 +; NONEON-NOSVE-NEXT: .LBB3_60: // %cond.store51 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #26] +; NONEON-NOSVE-NEXT: tbz w8, #27, .LBB3_28 +; NONEON-NOSVE-NEXT: .LBB3_61: // %cond.store53 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #27] +; NONEON-NOSVE-NEXT: tbz w8, #28, .LBB3_29 +; NONEON-NOSVE-NEXT: .LBB3_62: // %cond.store55 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #28] +; NONEON-NOSVE-NEXT: tbz w8, #29, .LBB3_30 +; NONEON-NOSVE-NEXT: .LBB3_63: // %cond.store57 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #29] +; NONEON-NOSVE-NEXT: tbz w8, #30, .LBB3_31 +; NONEON-NOSVE-NEXT: .LBB3_64: // %cond.store59 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #30] +; NONEON-NOSVE-NEXT: tbnz w8, #31, .LBB3_32 +; NONEON-NOSVE-NEXT: b .LBB3_33 call void @llvm.masked.store.v32i8(<32 x i8> zeroinitializer, ptr %dst, i32 8, <32 x i1> %mask) ret void } @@ -154,6 +768,31 @@ define void @masked_store_v2f16(ptr %dst, <2 x i1> %mask) { ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_store_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x2 +; NONEON-NOSVE-NEXT: bfxil w8, w9, #0, #1 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB4_3 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB4_4 +; NONEON-NOSVE-NEXT: .LBB4_2: // %else2 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB4_3: // %cond.store +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB4_2 +; NONEON-NOSVE-NEXT: .LBB4_4: // %cond.store1 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #2] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret call void @llvm.masked.store.v2f16(<2 x half> zeroinitializer, ptr %dst, i32 8, <2 x i1> %mask) ret void } @@ -169,6 +808,51 @@ define void @masked_store_v4f16(ptr %dst, <4 x i1> %mask) { ; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_store_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #6] +; NONEON-NOSVE-NEXT: ldrh w11, [sp] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x2 +; NONEON-NOSVE-NEXT: and w9, w9, #0x4 +; NONEON-NOSVE-NEXT: and w10, w10, #0x8 +; NONEON-NOSVE-NEXT: bfxil w8, w11, #0, #1 +; NONEON-NOSVE-NEXT: orr w9, w9, w10 +; NONEON-NOSVE-NEXT: orr w8, w8, w9 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB5_5 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB5_6 +; NONEON-NOSVE-NEXT: .LBB5_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB5_7 +; NONEON-NOSVE-NEXT: .LBB5_3: // %else4 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB5_8 +; NONEON-NOSVE-NEXT: .LBB5_4: // %else6 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB5_5: // %cond.store +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB5_2 +; NONEON-NOSVE-NEXT: .LBB5_6: // %cond.store1 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #2] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB5_3 +; NONEON-NOSVE-NEXT: .LBB5_7: // %cond.store3 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #4] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB5_4 +; NONEON-NOSVE-NEXT: .LBB5_8: // %cond.store5 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #6] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret call void @llvm.masked.store.v4f16(<4 x half> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask) ret void } @@ -185,6 +869,92 @@ define void @masked_store_v8f16(ptr %dst, <8 x i1> %mask) { ; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_store_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #4] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #5] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #6] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldrb w11, [sp] +; NONEON-NOSVE-NEXT: sbfx w12, w12, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w13, w13, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x4 +; NONEON-NOSVE-NEXT: and w9, w9, #0x8 +; NONEON-NOSVE-NEXT: sbfx w14, w14, #0, #1 +; NONEON-NOSVE-NEXT: orr w8, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: and w10, w10, #0x2 +; NONEON-NOSVE-NEXT: and w12, w12, #0x10 +; NONEON-NOSVE-NEXT: bfxil w10, w11, #0, #1 +; NONEON-NOSVE-NEXT: and w11, w13, #0x20 +; NONEON-NOSVE-NEXT: orr w8, w8, w12 +; NONEON-NOSVE-NEXT: and w12, w14, #0x40 +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: orr w8, w10, w8 +; NONEON-NOSVE-NEXT: orr w10, w11, w12 +; NONEON-NOSVE-NEXT: orr w8, w8, w10 +; NONEON-NOSVE-NEXT: and w9, w9, #0x80 +; NONEON-NOSVE-NEXT: add w9, w8, w9 +; NONEON-NOSVE-NEXT: and w8, w9, #0xff +; NONEON-NOSVE-NEXT: tbnz w9, #0, .LBB6_9 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB6_10 +; NONEON-NOSVE-NEXT: .LBB6_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB6_11 +; NONEON-NOSVE-NEXT: .LBB6_3: // %else4 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB6_12 +; NONEON-NOSVE-NEXT: .LBB6_4: // %else6 +; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB6_13 +; NONEON-NOSVE-NEXT: .LBB6_5: // %else8 +; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB6_14 +; NONEON-NOSVE-NEXT: .LBB6_6: // %else10 +; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB6_15 +; NONEON-NOSVE-NEXT: .LBB6_7: // %else12 +; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB6_16 +; NONEON-NOSVE-NEXT: .LBB6_8: // %else14 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB6_9: // %cond.store +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB6_2 +; NONEON-NOSVE-NEXT: .LBB6_10: // %cond.store1 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #2] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB6_3 +; NONEON-NOSVE-NEXT: .LBB6_11: // %cond.store3 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #4] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB6_4 +; NONEON-NOSVE-NEXT: .LBB6_12: // %cond.store5 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #6] +; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB6_5 +; NONEON-NOSVE-NEXT: .LBB6_13: // %cond.store7 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #8] +; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB6_6 +; NONEON-NOSVE-NEXT: .LBB6_14: // %cond.store9 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #10] +; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB6_7 +; NONEON-NOSVE-NEXT: .LBB6_15: // %cond.store11 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #12] +; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB6_8 +; NONEON-NOSVE-NEXT: .LBB6_16: // %cond.store13 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #14] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret call void @llvm.masked.store.v8f16(<8 x half> zeroinitializer, ptr %dst, i32 8, <8 x i1> %mask) ret void } @@ -209,6 +979,191 @@ define void @masked_store_v16f16(ptr %dst, <16 x i1> %mask) { ; CHECK-NEXT: st1h { z1.h }, p1, [x0, x8, lsl #1] ; CHECK-NEXT: st1h { z1.h }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_store_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: and w8, w8, #0x1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: and w8, w8, #0x1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x80 +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x40 +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x20 +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x10 +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x4 +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x2 +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x80 +; NONEON-NOSVE-NEXT: strb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x40 +; NONEON-NOSVE-NEXT: strb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x20 +; NONEON-NOSVE-NEXT: strb w8, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x10 +; NONEON-NOSVE-NEXT: strb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x4 +; NONEON-NOSVE-NEXT: strb w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x2 +; NONEON-NOSVE-NEXT: strb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; NONEON-NOSVE-NEXT: zip1 v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: str q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #44] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: add w9, w10, w11 +; NONEON-NOSVE-NEXT: add w10, w12, w13 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: add w9, w10, w14 +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #46] +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: add w8, w8, w10 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB7_17 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB7_18 +; NONEON-NOSVE-NEXT: .LBB7_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB7_19 +; NONEON-NOSVE-NEXT: .LBB7_3: // %else4 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB7_20 +; NONEON-NOSVE-NEXT: .LBB7_4: // %else6 +; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB7_21 +; NONEON-NOSVE-NEXT: .LBB7_5: // %else8 +; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB7_22 +; NONEON-NOSVE-NEXT: .LBB7_6: // %else10 +; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB7_23 +; NONEON-NOSVE-NEXT: .LBB7_7: // %else12 +; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB7_24 +; NONEON-NOSVE-NEXT: .LBB7_8: // %else14 +; NONEON-NOSVE-NEXT: tbnz w8, #8, .LBB7_25 +; NONEON-NOSVE-NEXT: .LBB7_9: // %else16 +; NONEON-NOSVE-NEXT: tbnz w8, #9, .LBB7_26 +; NONEON-NOSVE-NEXT: .LBB7_10: // %else18 +; NONEON-NOSVE-NEXT: tbnz w8, #10, .LBB7_27 +; NONEON-NOSVE-NEXT: .LBB7_11: // %else20 +; NONEON-NOSVE-NEXT: tbnz w8, #11, .LBB7_28 +; NONEON-NOSVE-NEXT: .LBB7_12: // %else22 +; NONEON-NOSVE-NEXT: tbnz w8, #12, .LBB7_29 +; NONEON-NOSVE-NEXT: .LBB7_13: // %else24 +; NONEON-NOSVE-NEXT: tbnz w8, #13, .LBB7_30 +; NONEON-NOSVE-NEXT: .LBB7_14: // %else26 +; NONEON-NOSVE-NEXT: tbnz w8, #14, .LBB7_31 +; NONEON-NOSVE-NEXT: .LBB7_15: // %else28 +; NONEON-NOSVE-NEXT: tbnz w8, #15, .LBB7_32 +; NONEON-NOSVE-NEXT: .LBB7_16: // %else30 +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB7_17: // %cond.store +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB7_2 +; NONEON-NOSVE-NEXT: .LBB7_18: // %cond.store1 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #2] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB7_3 +; NONEON-NOSVE-NEXT: .LBB7_19: // %cond.store3 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #4] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB7_4 +; NONEON-NOSVE-NEXT: .LBB7_20: // %cond.store5 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #6] +; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB7_5 +; NONEON-NOSVE-NEXT: .LBB7_21: // %cond.store7 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #8] +; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB7_6 +; NONEON-NOSVE-NEXT: .LBB7_22: // %cond.store9 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #10] +; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB7_7 +; NONEON-NOSVE-NEXT: .LBB7_23: // %cond.store11 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #12] +; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB7_8 +; NONEON-NOSVE-NEXT: .LBB7_24: // %cond.store13 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #14] +; NONEON-NOSVE-NEXT: tbz w8, #8, .LBB7_9 +; NONEON-NOSVE-NEXT: .LBB7_25: // %cond.store15 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #16] +; NONEON-NOSVE-NEXT: tbz w8, #9, .LBB7_10 +; NONEON-NOSVE-NEXT: .LBB7_26: // %cond.store17 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #18] +; NONEON-NOSVE-NEXT: tbz w8, #10, .LBB7_11 +; NONEON-NOSVE-NEXT: .LBB7_27: // %cond.store19 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #20] +; NONEON-NOSVE-NEXT: tbz w8, #11, .LBB7_12 +; NONEON-NOSVE-NEXT: .LBB7_28: // %cond.store21 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #22] +; NONEON-NOSVE-NEXT: tbz w8, #12, .LBB7_13 +; NONEON-NOSVE-NEXT: .LBB7_29: // %cond.store23 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #24] +; NONEON-NOSVE-NEXT: tbz w8, #13, .LBB7_14 +; NONEON-NOSVE-NEXT: .LBB7_30: // %cond.store25 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #26] +; NONEON-NOSVE-NEXT: tbz w8, #14, .LBB7_15 +; NONEON-NOSVE-NEXT: .LBB7_31: // %cond.store27 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #28] +; NONEON-NOSVE-NEXT: tbz w8, #15, .LBB7_16 +; NONEON-NOSVE-NEXT: .LBB7_32: // %cond.store29 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #30] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret call void @llvm.masked.store.v16f16(<16 x half> zeroinitializer, ptr %dst, i32 8, <16 x i1> %mask) ret void } @@ -225,6 +1180,47 @@ define void @masked_store_v4f32(ptr %dst, <4 x i1> %mask) { ; CHECK-NEXT: mov z0.s, #0 // =0x0 ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_store_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #6] +; NONEON-NOSVE-NEXT: ldrh w11, [sp] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x2 +; NONEON-NOSVE-NEXT: and w9, w9, #0x4 +; NONEON-NOSVE-NEXT: and w10, w10, #0x8 +; NONEON-NOSVE-NEXT: bfxil w8, w11, #0, #1 +; NONEON-NOSVE-NEXT: orr w9, w9, w10 +; NONEON-NOSVE-NEXT: orr w8, w8, w9 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB8_5 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB8_6 +; NONEON-NOSVE-NEXT: .LBB8_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB8_7 +; NONEON-NOSVE-NEXT: .LBB8_3: // %else4 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB8_8 +; NONEON-NOSVE-NEXT: .LBB8_4: // %else6 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB8_5: // %cond.store +; NONEON-NOSVE-NEXT: str wzr, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB8_2 +; NONEON-NOSVE-NEXT: .LBB8_6: // %cond.store1 +; NONEON-NOSVE-NEXT: str wzr, [x0, #4] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB8_3 +; NONEON-NOSVE-NEXT: .LBB8_7: // %cond.store3 +; NONEON-NOSVE-NEXT: str wzr, [x0, #8] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB8_4 +; NONEON-NOSVE-NEXT: .LBB8_8: // %cond.store5 +; NONEON-NOSVE-NEXT: str wzr, [x0, #12] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret call void @llvm.masked.store.v4f32(<4 x float> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask) ret void } @@ -275,6 +1271,84 @@ define void @masked_store_v8f32(ptr %dst, <8 x i1> %mask) { ; CHECK-NEXT: st1w { z1.s }, p0, [x0] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_store_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1] +; NONEON-NOSVE-NEXT: ldrb w12, [sp, #4] +; NONEON-NOSVE-NEXT: ldrb w13, [sp, #5] +; NONEON-NOSVE-NEXT: ldrb w14, [sp, #6] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: ldrb w11, [sp] +; NONEON-NOSVE-NEXT: sbfx w12, w12, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w13, w13, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x4 +; NONEON-NOSVE-NEXT: and w9, w9, #0x8 +; NONEON-NOSVE-NEXT: sbfx w14, w14, #0, #1 +; NONEON-NOSVE-NEXT: orr w8, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: and w10, w10, #0x2 +; NONEON-NOSVE-NEXT: and w12, w12, #0x10 +; NONEON-NOSVE-NEXT: bfxil w10, w11, #0, #1 +; NONEON-NOSVE-NEXT: and w11, w13, #0x20 +; NONEON-NOSVE-NEXT: orr w8, w8, w12 +; NONEON-NOSVE-NEXT: and w12, w14, #0x40 +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: orr w8, w10, w8 +; NONEON-NOSVE-NEXT: orr w10, w11, w12 +; NONEON-NOSVE-NEXT: orr w8, w8, w10 +; NONEON-NOSVE-NEXT: and w9, w9, #0x80 +; NONEON-NOSVE-NEXT: add w9, w8, w9 +; NONEON-NOSVE-NEXT: and w8, w9, #0xff +; NONEON-NOSVE-NEXT: tbnz w9, #0, .LBB9_9 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB9_10 +; NONEON-NOSVE-NEXT: .LBB9_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB9_11 +; NONEON-NOSVE-NEXT: .LBB9_3: // %else4 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB9_12 +; NONEON-NOSVE-NEXT: .LBB9_4: // %else6 +; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB9_13 +; NONEON-NOSVE-NEXT: .LBB9_5: // %else8 +; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB9_14 +; NONEON-NOSVE-NEXT: .LBB9_6: // %else10 +; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB9_15 +; NONEON-NOSVE-NEXT: .LBB9_7: // %else12 +; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB9_16 +; NONEON-NOSVE-NEXT: .LBB9_8: // %else14 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB9_9: // %cond.store +; NONEON-NOSVE-NEXT: str wzr, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB9_2 +; NONEON-NOSVE-NEXT: .LBB9_10: // %cond.store1 +; NONEON-NOSVE-NEXT: str wzr, [x0, #4] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB9_3 +; NONEON-NOSVE-NEXT: .LBB9_11: // %cond.store3 +; NONEON-NOSVE-NEXT: str wzr, [x0, #8] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB9_4 +; NONEON-NOSVE-NEXT: .LBB9_12: // %cond.store5 +; NONEON-NOSVE-NEXT: str wzr, [x0, #12] +; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB9_5 +; NONEON-NOSVE-NEXT: .LBB9_13: // %cond.store7 +; NONEON-NOSVE-NEXT: str wzr, [x0, #16] +; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB9_6 +; NONEON-NOSVE-NEXT: .LBB9_14: // %cond.store9 +; NONEON-NOSVE-NEXT: str wzr, [x0, #20] +; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB9_7 +; NONEON-NOSVE-NEXT: .LBB9_15: // %cond.store11 +; NONEON-NOSVE-NEXT: str wzr, [x0, #24] +; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB9_8 +; NONEON-NOSVE-NEXT: .LBB9_16: // %cond.store13 +; NONEON-NOSVE-NEXT: str wzr, [x0, #28] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret call void @llvm.masked.store.v8f32(<8 x float> zeroinitializer, ptr %dst, i32 8, <8 x i1> %mask) ret void } @@ -291,6 +1365,29 @@ define void @masked_store_v2f64(ptr %dst, <2 x i1> %mask) { ; CHECK-NEXT: mov z0.d, #0 // =0x0 ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_store_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x2 +; NONEON-NOSVE-NEXT: bfxil w8, w9, #0, #1 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB10_3 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB10_4 +; NONEON-NOSVE-NEXT: .LBB10_2: // %else2 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB10_3: // %cond.store +; NONEON-NOSVE-NEXT: str xzr, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB10_2 +; NONEON-NOSVE-NEXT: .LBB10_4: // %cond.store1 +; NONEON-NOSVE-NEXT: str xzr, [x0, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret call void @llvm.masked.store.v2f64(<2 x double> zeroinitializer, ptr %dst, i32 8, <2 x i1> %mask) ret void } @@ -315,6 +1412,47 @@ define void @masked_store_v4f64(ptr %dst, <4 x i1> %mask) { ; CHECK-NEXT: st1d { z0.d }, p1, [x0, x8, lsl #3] ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_store_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #6] +; NONEON-NOSVE-NEXT: ldrh w11, [sp] +; NONEON-NOSVE-NEXT: sbfx w8, w8, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w9, w9, #0, #1 +; NONEON-NOSVE-NEXT: sbfx w10, w10, #0, #1 +; NONEON-NOSVE-NEXT: and w8, w8, #0x2 +; NONEON-NOSVE-NEXT: and w9, w9, #0x4 +; NONEON-NOSVE-NEXT: and w10, w10, #0x8 +; NONEON-NOSVE-NEXT: bfxil w8, w11, #0, #1 +; NONEON-NOSVE-NEXT: orr w9, w9, w10 +; NONEON-NOSVE-NEXT: orr w8, w8, w9 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB11_5 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB11_6 +; NONEON-NOSVE-NEXT: .LBB11_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB11_7 +; NONEON-NOSVE-NEXT: .LBB11_3: // %else4 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB11_8 +; NONEON-NOSVE-NEXT: .LBB11_4: // %else6 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB11_5: // %cond.store +; NONEON-NOSVE-NEXT: str xzr, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB11_2 +; NONEON-NOSVE-NEXT: .LBB11_6: // %cond.store1 +; NONEON-NOSVE-NEXT: str xzr, [x0, #8] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB11_3 +; NONEON-NOSVE-NEXT: .LBB11_7: // %cond.store3 +; NONEON-NOSVE-NEXT: str xzr, [x0, #16] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB11_4 +; NONEON-NOSVE-NEXT: .LBB11_8: // %cond.store5 +; NONEON-NOSVE-NEXT: str xzr, [x0, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret call void @llvm.masked.store.v4f64(<4 x double> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask) ret void } diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll index aef446a90df65..dbdf5f2502999 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -14,6 +15,26 @@ define void @add_v4i8(ptr %a, ptr %b) { ; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: st1b { z0.h }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldrb w8, [x0, #3] +; NONEON-NOSVE-NEXT: ldrb w9, [x1, #3] +; NONEON-NOSVE-NEXT: ldrb w10, [x0, #2] +; NONEON-NOSVE-NEXT: ldrb w11, [x0, #1] +; NONEON-NOSVE-NEXT: ldrb w12, [x1, #2] +; NONEON-NOSVE-NEXT: ldrb w13, [x0] +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: ldrb w14, [x1, #1] +; NONEON-NOSVE-NEXT: ldrb w9, [x1] +; NONEON-NOSVE-NEXT: add w10, w10, w12 +; NONEON-NOSVE-NEXT: strb w8, [x0, #3] +; NONEON-NOSVE-NEXT: add w8, w11, w14 +; NONEON-NOSVE-NEXT: add w9, w13, w9 +; NONEON-NOSVE-NEXT: strb w10, [x0, #2] +; NONEON-NOSVE-NEXT: strb w8, [x0, #1] +; NONEON-NOSVE-NEXT: strb w9, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i8>, ptr %a %op2 = load <4 x i8>, ptr %b %res = add <4 x i8> %op1, %op2 @@ -29,6 +50,50 @@ define void @add_v8i8(ptr %a, ptr %b) { ; CHECK-NEXT: add z0.b, z0.b, z1.b ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d0, [x1] +; NONEON-NOSVE-NEXT: ldr d1, [x0] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i8>, ptr %a %op2 = load <8 x i8>, ptr %b %res = add <8 x i8> %op1, %op2 @@ -44,6 +109,81 @@ define void @add_v16i8(ptr %a, ptr %b) { ; CHECK-NEXT: add z0.b, z0.b, z1.b ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i8>, ptr %a %op2 = load <16 x i8>, ptr %b %res = add <16 x i8> %op1, %op2 @@ -60,6 +200,147 @@ define void @add_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: add z1.b, z2.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #47] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #95] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #62] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #61] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #93] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #60] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #59] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #91] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #58] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #57] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #89] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #56] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #55] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #54] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #53] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #85] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #52] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #51] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #50] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #49] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #81] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #48] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #79] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #77] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #75] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #73] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #71] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #69] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #67] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #65] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = add <32 x i8> %op1, %op2 @@ -76,6 +357,18 @@ define void @add_v2i16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: add z0.s, z0.s, z1.s ; CHECK-NEXT: st1h { z0.s }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldrh w8, [x0] +; NONEON-NOSVE-NEXT: ldrh w9, [x1] +; NONEON-NOSVE-NEXT: ldrh w10, [x0, #2] +; NONEON-NOSVE-NEXT: ldrh w11, [x1, #2] +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: add w9, w10, w11 +; NONEON-NOSVE-NEXT: strh w8, [x0] +; NONEON-NOSVE-NEXT: strh w9, [x0, #2] +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x i16>, ptr %a %op2 = load <2 x i16>, ptr %b %res = add <2 x i16> %op1, %op2 @@ -91,6 +384,34 @@ define void @add_v4i16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d0, [x1] +; NONEON-NOSVE-NEXT: ldr d1, [x0] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i16>, ptr %a %op2 = load <4 x i16>, ptr %b %res = add <4 x i16> %op1, %op2 @@ -106,6 +427,49 @@ define void @add_v8i16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i16>, ptr %a %op2 = load <8 x i16>, ptr %b %res = add <8 x i16> %op1, %op2 @@ -122,6 +486,83 @@ define void @add_v16i16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: add z1.h, z2.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #46] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #44] +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #42] +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #58] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #40] +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #38] +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #54] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #36] +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #34] +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #50] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = add <16 x i16> %op1, %op2 @@ -137,6 +578,23 @@ define void @abs_v2i32(ptr %a) { ; CHECK-NEXT: abs z0.s, p0/m, z0.s ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w9, w8, mi +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x i32>, ptr %a %res = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %op1, i1 false) store <2 x i32> %res, ptr %a @@ -151,6 +609,30 @@ define void @abs_v4i32(ptr %a) { ; CHECK-NEXT: abs z0.s, p0/m, z0.s ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w9, w8, mi +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w9, w8, mi +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i32>, ptr %a %res = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %op1, i1 false) store <4 x i32> %res, ptr %a @@ -166,6 +648,44 @@ define void @abs_v8i32(ptr %a) { ; CHECK-NEXT: abs z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w9, w8, mi +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w9, w8, mi +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w9, w8, mi +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w9, w8, mi +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: cmp w8, #0 +; NONEON-NOSVE-NEXT: cneg w8, w8, mi +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %res = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %op1, i1 false) store <8 x i32> %res, ptr %a @@ -180,6 +700,23 @@ define void @abs_v2i64(ptr %a) { ; CHECK-NEXT: abs z0.d, p0/m, z0.d ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: cmp x8, #0 +; NONEON-NOSVE-NEXT: cneg x9, x8, mi +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: cmp x8, #0 +; NONEON-NOSVE-NEXT: cneg x8, x8, mi +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x i64>, ptr %a %res = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %op1, i1 false) store <2 x i64> %res, ptr %a @@ -195,6 +732,30 @@ define void @abs_v4i64(ptr %a) { ; CHECK-NEXT: abs z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: cmp x8, #0 +; NONEON-NOSVE-NEXT: cneg x9, x8, mi +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: cmp x8, #0 +; NONEON-NOSVE-NEXT: cneg x8, x8, mi +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: cmp x8, #0 +; NONEON-NOSVE-NEXT: cneg x9, x8, mi +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: cmp x8, #0 +; NONEON-NOSVE-NEXT: cneg x8, x8, mi +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %res = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %op1, i1 false) store <4 x i64> %res, ptr %a @@ -211,6 +772,36 @@ define void @fadd_v2f16(ptr %a, ptr %b) { ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr w8, [x0] +; NONEON-NOSVE-NEXT: str w8, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr w8, [x1] +; NONEON-NOSVE-NEXT: str w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #32] +; NONEON-NOSVE-NEXT: str d0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] +; NONEON-NOSVE-NEXT: str w8, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x half>, ptr %a %op2 = load <2 x half>, ptr %b %res = fadd <2 x half> %op1, %op2 @@ -227,6 +818,46 @@ define void @fadd_v4f16(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d0, [x1] +; NONEON-NOSVE-NEXT: ldr d1, [x0] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #26] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x half>, ptr %a %op2 = load <4 x half>, ptr %b %res = fadd <4 x half> %op1, %op2 @@ -243,6 +874,73 @@ define void @fadd_v8f16(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #42] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #40] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #38] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #36] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #34] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x half>, ptr %a %op2 = load <8 x half>, ptr %b %res = fadd <8 x half> %op1, %op2 @@ -261,6 +959,131 @@ define void @fadd_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ldr h1, [sp, #46] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #44] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #94] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #42] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #92] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #40] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #90] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #38] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #88] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #36] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #86] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #34] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #84] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #32] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #82] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #14] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #80] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #12] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #78] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #10] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #76] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #74] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #6] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #72] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #4] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #70] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp, #2] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #68] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldr h1, [sp] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #66] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: str h0, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %res = fadd <16 x half> %op1, %op2 @@ -277,6 +1100,24 @@ define void @fadd_v2f32(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d0, [x1] +; NONEON-NOSVE-NEXT: ldr d1, [x0] +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fadd s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x float>, ptr %a %op2 = load <2 x float>, ptr %b %res = fadd <2 x float> %op1, %op2 @@ -293,6 +1134,29 @@ define void @fadd_v4f32(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fadd s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fadd s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x float>, ptr %a %op2 = load <4 x float>, ptr %b %res = fadd <4 x float> %op1, %op2 @@ -311,6 +1175,43 @@ define void @fadd_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #40] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #60] +; NONEON-NOSVE-NEXT: fadd s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #56] +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #32] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #88] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #52] +; NONEON-NOSVE-NEXT: fadd s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #48] +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #80] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] +; NONEON-NOSVE-NEXT: fadd s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s2, [sp] +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #72] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] +; NONEON-NOSVE-NEXT: fadd s3, s2, s0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %res = fadd <8 x float> %op1, %op2 @@ -327,6 +1228,23 @@ define void @fadd_v2f64(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: fadd d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: fadd d0, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x double>, ptr %a %op2 = load <2 x double>, ptr %b %res = fadd <2 x double> %op1, %op2 @@ -345,6 +1263,31 @@ define void @fadd_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp, #32] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #56] +; NONEON-NOSVE-NEXT: fadd d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #48] +; NONEON-NOSVE-NEXT: fadd d0, d1, d0 +; NONEON-NOSVE-NEXT: ldp d1, d2, [sp] +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #80] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: fadd d3, d2, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #16] +; NONEON-NOSVE-NEXT: fadd d0, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %res = fadd <4 x double> %op1, %op2 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll index 6d91253caae58..8c23f5f9922da 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -15,6 +16,74 @@ define void @test_revbv16i16(ptr %a) { ; CHECK-NEXT: revb z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revbv16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: str q1, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #16] +; NONEON-NOSVE-NEXT: strb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #47] +; NONEON-NOSVE-NEXT: strb w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #59] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #57] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #55] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #53] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #51] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #49] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <32 x i8>, ptr %a %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> store <32 x i8> %tmp2, ptr %a @@ -31,6 +100,74 @@ define void @test_revbv8i32(ptr %a) { ; CHECK-NEXT: revb z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revbv8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: str q1, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #16] +; NONEON-NOSVE-NEXT: strb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #47] +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #59] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #57] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #55] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #53] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #51] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #49] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <32 x i8>, ptr %a %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> store <32 x i8> %tmp2, ptr %a @@ -47,6 +184,74 @@ define void @test_revbv4i64(ptr %a) { ; CHECK-NEXT: revb z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revbv4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: str q1, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #16] +; NONEON-NOSVE-NEXT: strb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #59] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #57] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #47] +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #55] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #53] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #51] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #49] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <32 x i8>, ptr %a %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> store <32 x i8> %tmp2, ptr %a @@ -63,6 +268,38 @@ define void @test_revhv8i32(ptr %a) { ; CHECK-NEXT: revh z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revhv8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: str q1, [sp, #32] +; NONEON-NOSVE-NEXT: ror w9, w8, #16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: ror w8, w8, #16 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: ror w9, w8, #16 +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: ror w8, w8, #16 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #16] +; NONEON-NOSVE-NEXT: ror w9, w8, #16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] +; NONEON-NOSVE-NEXT: ror w8, w8, #16 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] +; NONEON-NOSVE-NEXT: ror w9, w8, #16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] +; NONEON-NOSVE-NEXT: ror w8, w8, #16 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <16 x i16>, ptr %a %tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> undef, <16 x i32> store <16 x i16> %tmp2, ptr %a @@ -79,6 +316,38 @@ define void @test_revhv8f32(ptr %a) { ; CHECK-NEXT: revh z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revhv8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: str q1, [sp, #32] +; NONEON-NOSVE-NEXT: ror w9, w8, #16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: ror w8, w8, #16 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: ror w9, w8, #16 +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: ror w8, w8, #16 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #16] +; NONEON-NOSVE-NEXT: ror w9, w8, #16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] +; NONEON-NOSVE-NEXT: ror w8, w8, #16 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] +; NONEON-NOSVE-NEXT: ror w9, w8, #16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] +; NONEON-NOSVE-NEXT: ror w8, w8, #16 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <16 x half>, ptr %a %tmp2 = shufflevector <16 x half> %tmp1, <16 x half> undef, <16 x i32> store <16 x half> %tmp2, ptr %a @@ -95,6 +364,38 @@ define void @test_revhv4i64(ptr %a) { ; CHECK-NEXT: revh z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revhv4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: str q1, [sp, #32] +; NONEON-NOSVE-NEXT: ror w9, w8, #16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: ror w8, w8, #16 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: ror w9, w8, #16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: ror w8, w8, #16 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #16] +; NONEON-NOSVE-NEXT: ror w9, w8, #16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] +; NONEON-NOSVE-NEXT: ror w8, w8, #16 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] +; NONEON-NOSVE-NEXT: ror w9, w8, #16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] +; NONEON-NOSVE-NEXT: ror w8, w8, #16 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <16 x i16>, ptr %a %tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> undef, <16 x i32> store <16 x i16> %tmp2, ptr %a @@ -111,6 +412,26 @@ define void @test_revwv4i64(ptr %a) { ; CHECK-NEXT: revw z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revwv4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] +; NONEON-NOSVE-NEXT: str q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #16] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <8 x i32>, ptr %a %tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> store <8 x i32> %tmp2, ptr %a @@ -127,6 +448,26 @@ define void @test_revwv4f64(ptr %a) { ; CHECK-NEXT: revw z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revwv4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: str q1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #8] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #40] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56] +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #32] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <8 x float>, ptr %a %tmp2 = shufflevector <8 x float> %tmp1, <8 x float> undef, <8 x i32> store <8 x float> %tmp2, ptr %a @@ -141,6 +482,47 @@ define <16 x i8> @test_revv16i8(ptr %a) { ; CHECK-NEXT: revb z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revv16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <16 x i8>, ptr %a %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> ret <16 x i8> %tmp2 @@ -156,6 +538,26 @@ define void @test_revwv8i32v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: revw z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revwv8i32v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x1] +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] +; NONEON-NOSVE-NEXT: str q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #16] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <8 x i32>, ptr %a %tmp2 = load <8 x i32>, ptr %b %tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> %tmp2, <8 x i32> @@ -176,6 +578,62 @@ define void @test_revhv32i16(ptr %a) { ; CHECK-NEXT: stp q0, q1, [x0, #32] ; CHECK-NEXT: stp q2, q3, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revhv32i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #128 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0, #32] +; NONEON-NOSVE-NEXT: str q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] +; NONEON-NOSVE-NEXT: str q1, [sp, #96] +; NONEON-NOSVE-NEXT: str q3, [sp, #64] +; NONEON-NOSVE-NEXT: ror w9, w8, #16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] +; NONEON-NOSVE-NEXT: str q2, [sp] +; NONEON-NOSVE-NEXT: ror w8, w8, #16 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] +; NONEON-NOSVE-NEXT: ror w9, w8, #16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] +; NONEON-NOSVE-NEXT: ror w8, w8, #16 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #104] +; NONEON-NOSVE-NEXT: ldr q2, [sp, #48] +; NONEON-NOSVE-NEXT: ror w9, w8, #16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #108] +; NONEON-NOSVE-NEXT: ror w8, w8, #16 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] +; NONEON-NOSVE-NEXT: ror w9, w8, #16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #100] +; NONEON-NOSVE-NEXT: ror w8, w8, #16 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldr q3, [sp, #112] +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #70] +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldur w8, [sp, #74] +; NONEON-NOSVE-NEXT: ror w8, w8, #16 +; NONEON-NOSVE-NEXT: stur w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #80] +; NONEON-NOSVE-NEXT: ror w9, w8, #16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: ror w8, w8, #16 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: ror w9, w8, #16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: ror w8, w8, #16 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: stp q3, q2, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <32 x i16>, ptr %a %tmp2 = shufflevector <32 x i16> %tmp1, <32 x i16> undef, <32 x i32> store <32 x i16> %tmp2, ptr %a @@ -191,6 +649,22 @@ define void @test_rev_elts_fail(ptr %a) { ; CHECK-NEXT: tbl z0.d, { z2.d }, z0.d ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_rev_elts_fail: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp x9, x8, [sp] +; NONEON-NOSVE-NEXT: str q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: ldp x9, x8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #16] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <4 x i64>, ptr %a %tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> undef, <4 x i32> store <4 x i64> %tmp2, ptr %a @@ -208,6 +682,15 @@ define void @test_revdv4i64_sve2p1(ptr %a) #1 { ; CHECK-NEXT: revd z1.q, p0/m, z1.q ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revdv4i64_sve2p1: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ptrue p0.d, vl2 +; NONEON-NOSVE-NEXT: revd z0.q, p0/m, z0.q +; NONEON-NOSVE-NEXT: revd z1.q, p0/m, z1.q +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <4 x i64>, ptr %a %tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> undef, <4 x i32> store <4 x i64> %tmp2, ptr %a @@ -223,6 +706,15 @@ define void @test_revdv4f64_sve2p1(ptr %a) #1 { ; CHECK-NEXT: revd z1.q, p0/m, z1.q ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revdv4f64_sve2p1: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ptrue p0.d +; NONEON-NOSVE-NEXT: revd z0.q, p0/m, z0.q +; NONEON-NOSVE-NEXT: revd z1.q, p0/m, z1.q +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <4 x double>, ptr %a %tmp2 = shufflevector <4 x double> %tmp1, <4 x double> undef, <4 x i32> store <4 x double> %tmp2, ptr %a @@ -238,6 +730,27 @@ define void @test_revv8i32(ptr %a) { ; CHECK-NEXT: tbl z0.s, { z2.s }, z0.s ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revv8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #32] +; NONEON-NOSVE-NEXT: str q1, [sp] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #40] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <8 x i32>, ptr %a %tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> store <8 x i32> %tmp2, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll index 8808ad9a23d7c..bc6fdd1ecd5a7 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -68,6 +69,86 @@ define void @zip1_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: str q1, [x0, #16] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zip1_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x1, #16] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: strb w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: strb w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: strb w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: strb w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: strb w8, [sp, #6] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: strb w8, [sp, #4] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: strb w8, [sp, #2] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: strb w8, [sp] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #15] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #13] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #11] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #9] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #7] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #5] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #3] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #1] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldr q1, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #47] +; NONEON-NOSVE-NEXT: strb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #59] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #57] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #55] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #53] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #51] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #49] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: str q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %tmp1 = load volatile <32 x i8>, ptr %a %tmp2 = load volatile <32 x i8>, ptr %b %tmp3 = shufflevector <32 x i8> %tmp1, <32 x i8> %tmp2, <32 x i32> @@ -196,6 +277,153 @@ define void @zip_v32i16(ptr %a, ptr %b) { ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zip_v32i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #192 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 192 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x1, #32] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] +; NONEON-NOSVE-NEXT: ldp q5, q4, [x1] +; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #32] +; NONEON-NOSVE-NEXT: stp q3, q5, [sp] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #30] +; NONEON-NOSVE-NEXT: stp q6, q2, [sp, #32] +; NONEON-NOSVE-NEXT: stp q7, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q4, q1, [sp, #96] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #126] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] +; NONEON-NOSVE-NEXT: strh w8, [sp, #190] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #78] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #28] +; NONEON-NOSVE-NEXT: strh w8, [sp, #188] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #124] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #186] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #76] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #26] +; NONEON-NOSVE-NEXT: strh w8, [sp, #184] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #122] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #182] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #74] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #24] +; NONEON-NOSVE-NEXT: strh w8, [sp, #180] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #120] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #178] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #72] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #22] +; NONEON-NOSVE-NEXT: strh w8, [sp, #176] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #118] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #174] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #70] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #20] +; NONEON-NOSVE-NEXT: strh w8, [sp, #172] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #116] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #170] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #68] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #18] +; NONEON-NOSVE-NEXT: strh w8, [sp, #168] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #114] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: strh w8, [sp, #166] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #66] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #164] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #112] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #162] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #64] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #110] +; NONEON-NOSVE-NEXT: strh w8, [sp, #160] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #160] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #62] +; NONEON-NOSVE-NEXT: strh w8, [sp, #158] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #46] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #108] +; NONEON-NOSVE-NEXT: strh w8, [sp, #156] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #92] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #60] +; NONEON-NOSVE-NEXT: strh w8, [sp, #154] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #44] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #106] +; NONEON-NOSVE-NEXT: strh w8, [sp, #152] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #90] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #58] +; NONEON-NOSVE-NEXT: strh w8, [sp, #150] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #42] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #104] +; NONEON-NOSVE-NEXT: strh w8, [sp, #148] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #88] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #56] +; NONEON-NOSVE-NEXT: strh w8, [sp, #146] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #40] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #102] +; NONEON-NOSVE-NEXT: strh w8, [sp, #144] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #86] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #54] +; NONEON-NOSVE-NEXT: strh w8, [sp, #142] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #38] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #100] +; NONEON-NOSVE-NEXT: strh w8, [sp, #140] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #84] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #52] +; NONEON-NOSVE-NEXT: strh w8, [sp, #138] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #36] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #98] +; NONEON-NOSVE-NEXT: strh w8, [sp, #136] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #82] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #50] +; NONEON-NOSVE-NEXT: strh w8, [sp, #134] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #34] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #96] +; NONEON-NOSVE-NEXT: strh w8, [sp, #132] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #80] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #48] +; NONEON-NOSVE-NEXT: strh w8, [sp, #130] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #32] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #128] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #128] +; NONEON-NOSVE-NEXT: stp q3, q2, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #192 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <32 x i16>, ptr %a %tmp2 = load <32 x i16>, ptr %b %tmp3 = shufflevector <32 x i16> %tmp1, <32 x i16> %tmp2, <32 x i32> @@ -244,6 +472,54 @@ define void @zip1_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: str q1, [x0, #16] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zip1_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x1, #16] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: strh w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: strh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: strh w8, [sp, #4] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #38] +; NONEON-NOSVE-NEXT: strh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #36] +; NONEON-NOSVE-NEXT: strh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #34] +; NONEON-NOSVE-NEXT: strh w8, [sp, #6] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldr q1, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #46] +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #44] +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #42] +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #40] +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: str q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %tmp1 = load volatile <16 x i16>, ptr %a %tmp2 = load volatile <16 x i16>, ptr %b %tmp3 = shufflevector <16 x i16> %tmp1, <16 x i16> %tmp2, <16 x i32> @@ -276,6 +552,30 @@ define void @zip1_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: str q1, [x0, #16] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zip1_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x1, #16] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #16] +; NONEON-NOSVE-NEXT: ldp w10, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldp w8, w11, [sp, #32] +; NONEON-NOSVE-NEXT: stp w9, w11, [sp, #8] +; NONEON-NOSVE-NEXT: stp w10, w8, [sp] +; NONEON-NOSVE-NEXT: ldp w10, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldp w8, w11, [sp, #40] +; NONEON-NOSVE-NEXT: ldr q1, [sp] +; NONEON-NOSVE-NEXT: stp w9, w11, [sp, #56] +; NONEON-NOSVE-NEXT: stp w10, w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: str q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %tmp1 = load volatile <8 x i32>, ptr %a %tmp2 = load volatile <8 x i32>, ptr %b %tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> %tmp2, <8 x i32> @@ -298,6 +598,32 @@ define void @zip_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zip_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: stp q2, q1, [sp] +; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr d1, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #56] +; NONEON-NOSVE-NEXT: fadd d2, d1, d0 +; NONEON-NOSVE-NEXT: ldp d3, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: fadd d0, d3, d0 +; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #64] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #48] +; NONEON-NOSVE-NEXT: fadd d2, d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr d1, [sp] +; NONEON-NOSVE-NEXT: fadd d0, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <4 x double>, ptr %a %tmp2 = load <4 x double>, ptr %b %tmp3 = shufflevector <4 x double> %tmp1, <4 x double> %tmp2, <4 x i32> @@ -330,6 +656,33 @@ define void @zip_v4i32(ptr %a, ptr %b) { ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zip_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #20] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #4] +; NONEON-NOSVE-NEXT: str w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #16] +; NONEON-NOSVE-NEXT: str w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldr w9, [sp] +; NONEON-NOSVE-NEXT: str w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <4 x i32>, ptr %a %tmp2 = load <4 x i32>, ptr %b %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> @@ -351,6 +704,26 @@ define void @zip1_v8i32_undef(ptr %a) { ; CHECK-NEXT: str q1, [x0, #16] ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zip1_v8i32_undef: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #48 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: stp w8, w8, [sp] +; NONEON-NOSVE-NEXT: ldp w8, w10, [sp, #24] +; NONEON-NOSVE-NEXT: stp w9, w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldr q1, [sp] +; NONEON-NOSVE-NEXT: stp w10, w10, [sp, #40] +; NONEON-NOSVE-NEXT: stp w8, w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %tmp1 = load volatile <8 x i32>, ptr %a %tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> store volatile <8 x i32> %tmp2, ptr %a @@ -370,6 +743,135 @@ define void @trn_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: add z1.b, z1.b, z2.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trn_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #62] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #95] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #47] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #60] +; NONEON-NOSVE-NEXT: strb w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #61] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #93] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #45] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #58] +; NONEON-NOSVE-NEXT: strb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #59] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #91] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #43] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #56] +; NONEON-NOSVE-NEXT: strb w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #57] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #89] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #54] +; NONEON-NOSVE-NEXT: strb w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #55] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #39] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #52] +; NONEON-NOSVE-NEXT: strb w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #53] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #85] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #37] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #50] +; NONEON-NOSVE-NEXT: strb w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #51] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #35] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #48] +; NONEON-NOSVE-NEXT: strb w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #49] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #81] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #33] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #30] +; NONEON-NOSVE-NEXT: strb w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #79] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #28] +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #77] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #26] +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #75] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #24] +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #73] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #20] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #18] +; NONEON-NOSVE-NEXT: strb w8, [sp, #69] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #67] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #16] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #65] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <32 x i8>, ptr %a %tmp2 = load <32 x i8>, ptr %b %tmp3 = shufflevector <32 x i8> %tmp1, <32 x i8> %tmp2, <32 x i32> @@ -392,6 +894,36 @@ define void @trn_v8i16(ptr %a, ptr %b) { ; CHECK-NEXT: add z0.h, z1.h, z0.h ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trn_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #4] +; NONEON-NOSVE-NEXT: add w10, w9, w8 +; NONEON-NOSVE-NEXT: strh w10, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #8] +; NONEON-NOSVE-NEXT: add w10, w11, w10 +; NONEON-NOSVE-NEXT: strh w10, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #12] +; NONEON-NOSVE-NEXT: add w11, w10, w11 +; NONEON-NOSVE-NEXT: add w8, w8, w10 +; NONEON-NOSVE-NEXT: strh w11, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrh w8, [sp] +; NONEON-NOSVE-NEXT: add w11, w12, w11 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: strh w11, [sp, #20] +; NONEON-NOSVE-NEXT: strh w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <8 x i16>, ptr %a %tmp2 = load <8 x i16>, ptr %b %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> @@ -414,6 +946,83 @@ define void @trn_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: add z1.h, z1.h, z2.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trn_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #60] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #44] +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #46] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #56] +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #58] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #40] +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #42] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #52] +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #54] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #36] +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #38] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #48] +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #50] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #34] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #28] +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #24] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #20] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <16 x i16>, ptr %a %tmp2 = load <16 x i16>, ptr %b %tmp3 = shufflevector <16 x i16> %tmp1, <16 x i16> %tmp2, <16 x i32> @@ -436,6 +1045,29 @@ define void @trn_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: add z1.s, z1.s, z2.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trn_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #80 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: ldr q0, [x1, #16] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #48] +; NONEON-NOSVE-NEXT: str q2, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #32] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #64] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp] +; NONEON-NOSVE-NEXT: ldr q0, [sp] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <8 x i32>, ptr %a %tmp2 = load <8 x i32>, ptr %b %tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> %tmp2, <8 x i32> @@ -459,6 +1091,29 @@ define void @trn_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z1.d, p0/m, z1.d, z2.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trn_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q3, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: stp q2, q3, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #48] +; NONEON-NOSVE-NEXT: fadd d2, d1, d0 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #32] +; NONEON-NOSVE-NEXT: fadd d0, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #80] +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: fadd d2, d1, d0 +; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] +; NONEON-NOSVE-NEXT: fadd d0, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <4 x double>, ptr %a %tmp2 = load <4 x double>, ptr %b %tmp3 = shufflevector <4 x double> %tmp1, <4 x double> %tmp2, <4 x i32> @@ -479,6 +1134,27 @@ define void @trn_v4f32(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z2.s ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trn_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #24] +; NONEON-NOSVE-NEXT: fadd s2, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #8] +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #40] +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #16] +; NONEON-NOSVE-NEXT: fadd s2, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp] +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <4 x float>, ptr %a %tmp2 = load <4 x float>, ptr %b %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> @@ -500,6 +1176,28 @@ define void @trn_v8i32_undef(ptr %a) { ; CHECK-NEXT: add z1.s, z3.s, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trn_v8i32_undef: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #24] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <8 x i32>, ptr %a %tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> %tmp4 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> @@ -571,6 +1269,86 @@ define void @zip2_v32i8(ptr %a, ptr %b) #0{ ; CHECK-NEXT: str q1, [x0, #16] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zip2_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x1, #16] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: strb w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: strb w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: strb w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: strb w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: strb w8, [sp, #6] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: strb w8, [sp, #4] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: strb w8, [sp, #2] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: strb w8, [sp] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #15] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #13] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #11] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #9] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #7] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #5] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #3] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #1] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldr q1, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #47] +; NONEON-NOSVE-NEXT: strb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #59] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #57] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #55] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #53] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #51] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #49] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: str q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %tmp1 = load volatile <32 x i8>, ptr %a %tmp2 = load volatile <32 x i8>, ptr %b %tmp3 = shufflevector <32 x i8> %tmp1, <32 x i8> %tmp2, <32 x i32> @@ -617,6 +1395,54 @@ define void @zip2_v16i16(ptr %a, ptr %b) #0{ ; CHECK-NEXT: str q1, [x0, #16] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zip2_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x1, #16] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: strh w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: strh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: strh w8, [sp, #4] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #38] +; NONEON-NOSVE-NEXT: strh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #36] +; NONEON-NOSVE-NEXT: strh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #34] +; NONEON-NOSVE-NEXT: strh w8, [sp, #6] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldr q1, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #46] +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #44] +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #42] +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #40] +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: str q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %tmp1 = load volatile <16 x i16>, ptr %a %tmp2 = load volatile <16 x i16>, ptr %b %tmp3 = shufflevector <16 x i16> %tmp1, <16 x i16> %tmp2, <16 x i32> @@ -649,6 +1475,30 @@ define void @zip2_v8i32(ptr %a, ptr %b) #0{ ; CHECK-NEXT: str q1, [x0, #16] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zip2_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x1, #16] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #16] +; NONEON-NOSVE-NEXT: ldp w10, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldp w8, w11, [sp, #32] +; NONEON-NOSVE-NEXT: stp w9, w11, [sp, #8] +; NONEON-NOSVE-NEXT: stp w10, w8, [sp] +; NONEON-NOSVE-NEXT: ldp w10, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldp w8, w11, [sp, #40] +; NONEON-NOSVE-NEXT: ldr q1, [sp] +; NONEON-NOSVE-NEXT: stp w9, w11, [sp, #56] +; NONEON-NOSVE-NEXT: stp w10, w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: str q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %tmp1 = load volatile <8 x i32>, ptr %a %tmp2 = load volatile <8 x i32>, ptr %b %tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> %tmp2, <8 x i32> @@ -668,6 +1518,26 @@ define void @zip2_v8i32_undef(ptr %a) #0{ ; CHECK-NEXT: str q1, [x0, #16] ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zip2_v8i32_undef: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #48 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [sp, #16] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: stp w8, w8, [sp] +; NONEON-NOSVE-NEXT: ldp w8, w10, [sp, #24] +; NONEON-NOSVE-NEXT: stp w9, w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldr q1, [sp] +; NONEON-NOSVE-NEXT: stp w10, w10, [sp, #40] +; NONEON-NOSVE-NEXT: stp w8, w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %tmp1 = load volatile <8 x i32>, ptr %a %tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> store volatile <8 x i32> %tmp2, ptr %a @@ -869,6 +1739,135 @@ define void @uzp_v32i8(ptr %a, ptr %b) #0{ ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uzp_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: stp q2, q1, [sp] +; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #62] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #60] +; NONEON-NOSVE-NEXT: strb w8, [sp, #95] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #61] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #58] +; NONEON-NOSVE-NEXT: strb w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #59] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #56] +; NONEON-NOSVE-NEXT: strb w8, [sp, #93] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #57] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #54] +; NONEON-NOSVE-NEXT: strb w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #55] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #52] +; NONEON-NOSVE-NEXT: strb w8, [sp, #91] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #53] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #50] +; NONEON-NOSVE-NEXT: strb w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #51] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #48] +; NONEON-NOSVE-NEXT: strb w8, [sp, #89] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #49] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #30] +; NONEON-NOSVE-NEXT: strb w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #28] +; NONEON-NOSVE-NEXT: strb w8, [sp, #87] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #26] +; NONEON-NOSVE-NEXT: strb w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #24] +; NONEON-NOSVE-NEXT: strb w8, [sp, #85] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #22] +; NONEON-NOSVE-NEXT: strb w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #20] +; NONEON-NOSVE-NEXT: strb w8, [sp, #83] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #18] +; NONEON-NOSVE-NEXT: strb w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #16] +; NONEON-NOSVE-NEXT: strb w8, [sp, #81] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #47] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #79] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #45] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #43] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #77] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #39] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #75] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #37] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #35] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #73] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #69] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrb w9, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #65] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <32 x i8>, ptr %a %tmp2 = load <32 x i8>, ptr %b %tmp3 = shufflevector <32 x i8> %tmp1, <32 x i8> %tmp2, <32 x i32> @@ -891,6 +1890,26 @@ define void @uzp_v4i16(ptr %a, ptr %b) #0{ ; CHECK-NEXT: add z0.h, z1.h, z0.h ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uzp_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w8, [sp] +; NONEON-NOSVE-NEXT: add w9, w9, w8 +; NONEON-NOSVE-NEXT: strh w9, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: strh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <4 x i16>, ptr %a %tmp2 = load <4 x i16>, ptr %b %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> @@ -1008,6 +2027,83 @@ define void @uzp_v16i16(ptr %a, ptr %b) #0{ ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uzp_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: stp q2, q1, [sp] +; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #60] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #56] +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #58] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #52] +; NONEON-NOSVE-NEXT: strh w8, [sp, #92] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #54] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #48] +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #50] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #28] +; NONEON-NOSVE-NEXT: strh w8, [sp, #88] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #24] +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #20] +; NONEON-NOSVE-NEXT: strh w8, [sp, #84] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #44] +; NONEON-NOSVE-NEXT: strh w8, [sp, #80] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #46] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #40] +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #42] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #36] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #38] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #34] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <16 x i16>, ptr %a %tmp2 = load <16 x i16>, ptr %b %tmp3 = shufflevector <16 x i16> %tmp1, <16 x i16> %tmp2, <16 x i32> @@ -1047,6 +2143,35 @@ define void @uzp_v8f32(ptr %a, ptr %b) #0{ ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uzp_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #80 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: mov x8, #9205357640488583168 // =0x7fc000007fc00000 +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: str x8, [sp, #56] +; NONEON-NOSVE-NEXT: mov w8, #2143289344 // =0x7fc00000 +; NONEON-NOSVE-NEXT: str w8, [sp, #48] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #16] +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #24] +; NONEON-NOSVE-NEXT: str q2, [sp] +; NONEON-NOSVE-NEXT: str w8, [sp, #68] +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: str s0, [sp, #52] +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #40] +; NONEON-NOSVE-NEXT: fadd s2, s1, s0 +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #32] +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #72] +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp] +; NONEON-NOSVE-NEXT: fadd s0, s1, s0 +; NONEON-NOSVE-NEXT: str s0, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #48] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <8 x float>, ptr %a %tmp2 = load <8 x float>, ptr %b %tmp3 = shufflevector <8 x float> %tmp1, <8 x float> %tmp2, <8 x i32> @@ -1069,6 +2194,31 @@ define void @uzp_v4i64(ptr %a, ptr %b) #0{ ; CHECK-NEXT: add z1.d, z1.d, z2.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uzp_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: stp q2, q1, [sp] +; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp x9, x8, [sp, #48] +; NONEON-NOSVE-NEXT: add x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #88] +; NONEON-NOSVE-NEXT: ldp x9, x8, [sp, #16] +; NONEON-NOSVE-NEXT: add x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #80] +; NONEON-NOSVE-NEXT: ldp x9, x8, [sp, #32] +; NONEON-NOSVE-NEXT: add x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #72] +; NONEON-NOSVE-NEXT: ldp x9, x8, [sp] +; NONEON-NOSVE-NEXT: add x8, x9, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #64] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <4 x i64>, ptr %a %tmp2 = load <4 x i64>, ptr %b %tmp3 = shufflevector <4 x i64> %tmp1, <4 x i64> %tmp2, <4 x i32> @@ -1136,6 +2286,49 @@ define void @uzp_v8i16(ptr %a, ptr %b) #0{ ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uzp_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #28] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #24] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #20] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <8 x i16>, ptr %a %tmp2 = load <8 x i16>, ptr %b %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> @@ -1174,6 +2367,28 @@ define void @uzp_v8i32_undef(ptr %a) #0{ ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uzp_v8i32_undef: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #24] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp] +; NONEON-NOSVE-NEXT: add w8, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <8 x i32>, ptr %a %tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> %tmp4 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> @@ -1197,6 +2412,32 @@ define void @zip_vscale2_4(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zip_vscale2_4: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: stp q2, q1, [sp] +; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr d1, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #56] +; NONEON-NOSVE-NEXT: fadd d2, d1, d0 +; NONEON-NOSVE-NEXT: ldp d3, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: fadd d0, d3, d0 +; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #64] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #48] +; NONEON-NOSVE-NEXT: fadd d2, d1, d0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr d1, [sp] +; NONEON-NOSVE-NEXT: fadd d0, d1, d0 +; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %tmp1 = load <4 x double>, ptr %a %tmp2 = load <4 x double>, ptr %b %tmp3 = shufflevector <4 x double> %tmp1, <4 x double> %tmp2, <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll index 8039bd096bcb8..8ebf713a671f4 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -35,6 +36,80 @@ define i1 @ptest_v16i1(ptr %a, ptr %b) { ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ptest_v16i1: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: mov w8, #255 // =0xff +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32] +; NONEON-NOSVE-NEXT: stp q1, q2, [sp] +; NONEON-NOSVE-NEXT: stp q0, q3, [sp, #32] +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #40] +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: csel w9, w8, wzr, ne +; NONEON-NOSVE-NEXT: fcmp s1, #0.0 +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp] +; NONEON-NOSVE-NEXT: csetm w10, ne +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: csetm w11, ne +; NONEON-NOSVE-NEXT: fcmp s1, #0.0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: csinv w11, w11, wzr, eq +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: csinv w11, w11, wzr, eq +; NONEON-NOSVE-NEXT: fcmp s1, #0.0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #32] +; NONEON-NOSVE-NEXT: csinv w11, w11, wzr, eq +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: csinv w11, w11, wzr, eq +; NONEON-NOSVE-NEXT: fcmp s1, #0.0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #48] +; NONEON-NOSVE-NEXT: csinv w11, w11, wzr, eq +; NONEON-NOSVE-NEXT: cmp w11, w10 +; NONEON-NOSVE-NEXT: csel w10, w11, w10, hi +; NONEON-NOSVE-NEXT: and w10, w10, #0xff +; NONEON-NOSVE-NEXT: cmp w10, w9 +; NONEON-NOSVE-NEXT: csel w9, w10, w9, hi +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: csel w10, w8, wzr, ne +; NONEON-NOSVE-NEXT: cmp w9, w10 +; NONEON-NOSVE-NEXT: csel w9, w9, w10, hi +; NONEON-NOSVE-NEXT: fcmp s1, #0.0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #56] +; NONEON-NOSVE-NEXT: csel w10, w8, wzr, ne +; NONEON-NOSVE-NEXT: cmp w9, w10 +; NONEON-NOSVE-NEXT: csel w9, w9, w10, hi +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: csel w10, w8, wzr, ne +; NONEON-NOSVE-NEXT: cmp w9, w10 +; NONEON-NOSVE-NEXT: csel w9, w9, w10, hi +; NONEON-NOSVE-NEXT: fcmp s1, #0.0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #16] +; NONEON-NOSVE-NEXT: csel w10, w8, wzr, ne +; NONEON-NOSVE-NEXT: cmp w9, w10 +; NONEON-NOSVE-NEXT: csel w9, w9, w10, hi +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: csel w10, w8, wzr, ne +; NONEON-NOSVE-NEXT: cmp w9, w10 +; NONEON-NOSVE-NEXT: csel w9, w9, w10, hi +; NONEON-NOSVE-NEXT: fcmp s1, #0.0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #24] +; NONEON-NOSVE-NEXT: csel w10, w8, wzr, ne +; NONEON-NOSVE-NEXT: cmp w9, w10 +; NONEON-NOSVE-NEXT: csel w9, w9, w10, hi +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: csel w10, w8, wzr, ne +; NONEON-NOSVE-NEXT: cmp w9, w10 +; NONEON-NOSVE-NEXT: csel w9, w9, w10, hi +; NONEON-NOSVE-NEXT: fcmp s1, #0.0 +; NONEON-NOSVE-NEXT: csel w8, w8, wzr, ne +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: and w0, w8, #0x1 +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %v0 = bitcast ptr %a to ptr %v1 = load <16 x float>, ptr %v0, align 4 %v2 = fcmp une <16 x float> %v1, zeroinitializer @@ -92,6 +167,148 @@ define i1 @ptest_or_v16i1(ptr %a, ptr %b) { ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ptest_or_v16i1: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #128 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32] +; NONEON-NOSVE-NEXT: str q1, [sp] +; NONEON-NOSVE-NEXT: stp q0, q3, [sp, #48] +; NONEON-NOSVE-NEXT: str q2, [sp, #32] +; NONEON-NOSVE-NEXT: ldr s1, [sp, #52] +; NONEON-NOSVE-NEXT: ldr q0, [x1, #16] +; NONEON-NOSVE-NEXT: str q0, [sp, #96] +; NONEON-NOSVE-NEXT: ldp s2, s0, [sp, #96] +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #48] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: fcmp s1, #0.0 +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: str q1, [sp, #16] +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, eq +; NONEON-NOSVE-NEXT: fcmp s2, #0.0 +; NONEON-NOSVE-NEXT: ldr s2, [sp, #12] +; NONEON-NOSVE-NEXT: csetm w9, ne +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #24] +; NONEON-NOSVE-NEXT: csinv w9, w9, wzr, eq +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: csetm w10, ne +; NONEON-NOSVE-NEXT: fcmp s2, #0.0 +; NONEON-NOSVE-NEXT: csinv w10, w10, wzr, eq +; NONEON-NOSVE-NEXT: fcmp s1, #0.0 +; NONEON-NOSVE-NEXT: csetm w11, ne +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #16] +; NONEON-NOSVE-NEXT: csinv w11, w11, wzr, eq +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: ldp s2, s0, [sp] +; NONEON-NOSVE-NEXT: orr w10, w11, w10 +; NONEON-NOSVE-NEXT: csetm w12, ne +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: csinv w12, w12, wzr, eq +; NONEON-NOSVE-NEXT: fcmp s1, #0.0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #104] +; NONEON-NOSVE-NEXT: csetm w13, ne +; NONEON-NOSVE-NEXT: fcmp s2, #0.0 +; NONEON-NOSVE-NEXT: csinv w14, w13, wzr, eq +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: ldp s0, s2, [sp, #56] +; NONEON-NOSVE-NEXT: orr w12, w14, w12 +; NONEON-NOSVE-NEXT: orr w10, w12, w10 +; NONEON-NOSVE-NEXT: csetm w13, ne +; NONEON-NOSVE-NEXT: orr w9, w10, w9 +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: ldr q0, [x1, #32] +; NONEON-NOSVE-NEXT: str q0, [sp, #112] +; NONEON-NOSVE-NEXT: csinv w13, w13, wzr, eq +; NONEON-NOSVE-NEXT: fcmp s1, #0.0 +; NONEON-NOSVE-NEXT: ldr s1, [sp, #64] +; NONEON-NOSVE-NEXT: csetm w15, ne +; NONEON-NOSVE-NEXT: fcmp s2, #0.0 +; NONEON-NOSVE-NEXT: ldp s0, s2, [sp, #112] +; NONEON-NOSVE-NEXT: csinv w15, w15, wzr, eq +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: and w11, w15, #0xff +; NONEON-NOSVE-NEXT: csetm w16, ne +; NONEON-NOSVE-NEXT: fcmp s1, #0.0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #68] +; NONEON-NOSVE-NEXT: csinv w16, w16, wzr, eq +; NONEON-NOSVE-NEXT: fcmp s2, #0.0 +; NONEON-NOSVE-NEXT: csetm w17, ne +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: ldp s0, s2, [sp, #120] +; NONEON-NOSVE-NEXT: csinv w17, w17, wzr, eq +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #76] +; NONEON-NOSVE-NEXT: csetm w18, ne +; NONEON-NOSVE-NEXT: fcmp s1, #0.0 +; NONEON-NOSVE-NEXT: ldr q1, [x1, #48] +; NONEON-NOSVE-NEXT: str q1, [sp, #80] +; NONEON-NOSVE-NEXT: csinv w18, w18, wzr, eq +; NONEON-NOSVE-NEXT: fcmp s2, #0.0 +; NONEON-NOSVE-NEXT: ldr s2, [sp, #32] +; NONEON-NOSVE-NEXT: csetm w0, ne +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #80] +; NONEON-NOSVE-NEXT: csinv w0, w0, wzr, eq +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: csetm w1, ne +; NONEON-NOSVE-NEXT: fcmp s2, #0.0 +; NONEON-NOSVE-NEXT: csinv w1, w1, wzr, eq +; NONEON-NOSVE-NEXT: fcmp s1, #0.0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #36] +; NONEON-NOSVE-NEXT: csetm w2, ne +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: ldp s0, s2, [sp, #88] +; NONEON-NOSVE-NEXT: csinv w2, w2, wzr, eq +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #44] +; NONEON-NOSVE-NEXT: csetm w3, ne +; NONEON-NOSVE-NEXT: fcmp s1, #0.0 +; NONEON-NOSVE-NEXT: csinv w3, w3, wzr, eq +; NONEON-NOSVE-NEXT: fcmp s2, #0.0 +; NONEON-NOSVE-NEXT: csetm w4, ne +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: csinv w10, w4, wzr, eq +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi +; NONEON-NOSVE-NEXT: and w9, w13, #0xff +; NONEON-NOSVE-NEXT: and w10, w10, #0xff +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: and w9, w16, #0xff +; NONEON-NOSVE-NEXT: cmp w8, w11 +; NONEON-NOSVE-NEXT: csel w8, w8, w11, hi +; NONEON-NOSVE-NEXT: and w11, w17, #0xff +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: and w9, w18, #0xff +; NONEON-NOSVE-NEXT: cmp w8, w11 +; NONEON-NOSVE-NEXT: csel w8, w8, w11, hi +; NONEON-NOSVE-NEXT: and w11, w0, #0xff +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: and w9, w1, #0xff +; NONEON-NOSVE-NEXT: cmp w8, w11 +; NONEON-NOSVE-NEXT: csel w8, w8, w11, hi +; NONEON-NOSVE-NEXT: and w11, w2, #0xff +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: and w9, w3, #0xff +; NONEON-NOSVE-NEXT: cmp w8, w11 +; NONEON-NOSVE-NEXT: csel w8, w8, w11, hi +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi +; NONEON-NOSVE-NEXT: and w0, w8, #0x1 +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret %v0 = bitcast ptr %a to ptr %v1 = load <16 x float>, ptr %v0, align 4 %v2 = fcmp une <16 x float> %v1, zeroinitializer @@ -159,6 +376,148 @@ define i1 @ptest_and_v16i1(ptr %a, ptr %b) { ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ptest_and_v16i1: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #128 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32] +; NONEON-NOSVE-NEXT: str q1, [sp] +; NONEON-NOSVE-NEXT: stp q0, q3, [sp, #48] +; NONEON-NOSVE-NEXT: str q2, [sp, #32] +; NONEON-NOSVE-NEXT: ldr s1, [sp, #52] +; NONEON-NOSVE-NEXT: ldr q0, [x1, #16] +; NONEON-NOSVE-NEXT: str q0, [sp, #96] +; NONEON-NOSVE-NEXT: ldp s2, s0, [sp, #96] +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #48] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: fcmp s1, #0.0 +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: str q1, [sp, #16] +; NONEON-NOSVE-NEXT: csel w8, w8, wzr, ne +; NONEON-NOSVE-NEXT: fcmp s2, #0.0 +; NONEON-NOSVE-NEXT: ldr s2, [sp, #12] +; NONEON-NOSVE-NEXT: csetm w9, ne +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #24] +; NONEON-NOSVE-NEXT: csel w9, w9, wzr, ne +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: csetm w10, ne +; NONEON-NOSVE-NEXT: fcmp s2, #0.0 +; NONEON-NOSVE-NEXT: csel w10, w10, wzr, ne +; NONEON-NOSVE-NEXT: fcmp s1, #0.0 +; NONEON-NOSVE-NEXT: csetm w11, ne +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #16] +; NONEON-NOSVE-NEXT: csel w11, w11, wzr, ne +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: ldp s2, s0, [sp] +; NONEON-NOSVE-NEXT: and w10, w11, w10 +; NONEON-NOSVE-NEXT: csetm w12, ne +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: csel w12, w12, wzr, ne +; NONEON-NOSVE-NEXT: fcmp s1, #0.0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #104] +; NONEON-NOSVE-NEXT: csetm w13, ne +; NONEON-NOSVE-NEXT: fcmp s2, #0.0 +; NONEON-NOSVE-NEXT: csel w14, w13, wzr, ne +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: ldp s0, s2, [sp, #56] +; NONEON-NOSVE-NEXT: and w12, w14, w12 +; NONEON-NOSVE-NEXT: and w10, w12, w10 +; NONEON-NOSVE-NEXT: csetm w13, ne +; NONEON-NOSVE-NEXT: and w9, w10, w9 +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: ldr q0, [x1, #32] +; NONEON-NOSVE-NEXT: str q0, [sp, #112] +; NONEON-NOSVE-NEXT: csel w13, w13, wzr, ne +; NONEON-NOSVE-NEXT: fcmp s1, #0.0 +; NONEON-NOSVE-NEXT: ldr s1, [sp, #64] +; NONEON-NOSVE-NEXT: csetm w15, ne +; NONEON-NOSVE-NEXT: fcmp s2, #0.0 +; NONEON-NOSVE-NEXT: ldp s0, s2, [sp, #112] +; NONEON-NOSVE-NEXT: csel w15, w15, wzr, ne +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: and w11, w15, #0xff +; NONEON-NOSVE-NEXT: csetm w16, ne +; NONEON-NOSVE-NEXT: fcmp s1, #0.0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #68] +; NONEON-NOSVE-NEXT: csel w16, w16, wzr, ne +; NONEON-NOSVE-NEXT: fcmp s2, #0.0 +; NONEON-NOSVE-NEXT: csetm w17, ne +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: ldp s0, s2, [sp, #120] +; NONEON-NOSVE-NEXT: csel w17, w17, wzr, ne +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #76] +; NONEON-NOSVE-NEXT: csetm w18, ne +; NONEON-NOSVE-NEXT: fcmp s1, #0.0 +; NONEON-NOSVE-NEXT: ldr q1, [x1, #48] +; NONEON-NOSVE-NEXT: str q1, [sp, #80] +; NONEON-NOSVE-NEXT: csel w18, w18, wzr, ne +; NONEON-NOSVE-NEXT: fcmp s2, #0.0 +; NONEON-NOSVE-NEXT: ldr s2, [sp, #32] +; NONEON-NOSVE-NEXT: csetm w0, ne +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #80] +; NONEON-NOSVE-NEXT: csel w0, w0, wzr, ne +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: csetm w1, ne +; NONEON-NOSVE-NEXT: fcmp s2, #0.0 +; NONEON-NOSVE-NEXT: csel w1, w1, wzr, ne +; NONEON-NOSVE-NEXT: fcmp s1, #0.0 +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #36] +; NONEON-NOSVE-NEXT: csetm w2, ne +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: ldp s0, s2, [sp, #88] +; NONEON-NOSVE-NEXT: csel w2, w2, wzr, ne +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #44] +; NONEON-NOSVE-NEXT: csetm w3, ne +; NONEON-NOSVE-NEXT: fcmp s1, #0.0 +; NONEON-NOSVE-NEXT: csel w3, w3, wzr, ne +; NONEON-NOSVE-NEXT: fcmp s2, #0.0 +; NONEON-NOSVE-NEXT: csetm w4, ne +; NONEON-NOSVE-NEXT: fcmp s0, #0.0 +; NONEON-NOSVE-NEXT: csel w10, w4, wzr, ne +; NONEON-NOSVE-NEXT: cmp w9, w8 +; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo +; NONEON-NOSVE-NEXT: and w9, w13, #0xff +; NONEON-NOSVE-NEXT: and w10, w10, #0xff +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: and w9, w16, #0xff +; NONEON-NOSVE-NEXT: cmp w8, w11 +; NONEON-NOSVE-NEXT: csel w8, w8, w11, lo +; NONEON-NOSVE-NEXT: and w11, w17, #0xff +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: and w9, w18, #0xff +; NONEON-NOSVE-NEXT: cmp w8, w11 +; NONEON-NOSVE-NEXT: csel w8, w8, w11, lo +; NONEON-NOSVE-NEXT: and w11, w0, #0xff +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: and w9, w1, #0xff +; NONEON-NOSVE-NEXT: cmp w8, w11 +; NONEON-NOSVE-NEXT: csel w8, w8, w11, lo +; NONEON-NOSVE-NEXT: and w11, w2, #0xff +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: and w9, w3, #0xff +; NONEON-NOSVE-NEXT: cmp w8, w11 +; NONEON-NOSVE-NEXT: csel w8, w8, w11, lo +; NONEON-NOSVE-NEXT: cmp w8, w9 +; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo +; NONEON-NOSVE-NEXT: cmp w8, w10 +; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo +; NONEON-NOSVE-NEXT: and w0, w8, #0x1 +; NONEON-NOSVE-NEXT: add sp, sp, #128 +; NONEON-NOSVE-NEXT: ret %v0 = bitcast ptr %a to ptr %v1 = load <16 x float>, ptr %v0, align 4 %v2 = fcmp une <16 x float> %v1, zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll index 726fd28c90ae2..bc0fc7c79391d 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -18,6 +19,30 @@ define <4 x i8> @bitreverse_v4i8(<4 x i8> %op) { ; CHECK-NEXT: lsr z0.h, z0.h, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strh w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w8, [sp] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> %op) ret <4 x i8> %res } @@ -30,6 +55,46 @@ define <8 x i8> @bitreverse_v8i8(<8 x i8> %op) { ; CHECK-NEXT: rbit z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #15] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #13] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #11] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #9] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %op) ret <8 x i8> %res } @@ -42,6 +107,78 @@ define <16 x i8> @bitreverse_v16i8(<16 x i8> %op) { ; CHECK-NEXT: rbit z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %op) ret <16 x i8> %res } @@ -55,6 +192,144 @@ define void @bitreverse_v32i8(ptr %a) { ; CHECK-NEXT: rbit z1.b, p0/m, z1.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #59] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #57] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #55] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #53] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #51] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #49] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %res = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %op) store <32 x i8> %res, ptr %a @@ -70,6 +345,21 @@ define <2 x i16> @bitreverse_v2i16(<2 x i16> %op) { ; CHECK-NEXT: lsr z0.s, z0.s, #16 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w9, w8, #16 +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %op) ret <2 x i16> %res } @@ -82,6 +372,30 @@ define <4 x i16> @bitreverse_v4i16(<4 x i16> %op) { ; CHECK-NEXT: rbit z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w8, [sp] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> %op) ret <4 x i16> %res } @@ -94,6 +408,46 @@ define <8 x i16> @bitreverse_v8i16(<8 x i16> %op) { ; CHECK-NEXT: rbit z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrh w8, [sp] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %op) ret <8 x i16> %res } @@ -107,6 +461,80 @@ define void @bitreverse_v16i16(ptr %a) { ; CHECK-NEXT: rbit z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %op) store <16 x i16> %res, ptr %a @@ -121,6 +549,19 @@ define <2 x i32> @bitreverse_v2i32(<2 x i32> %op) { ; CHECK-NEXT: rbit z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: rbit w9, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %op) ret <2 x i32> %res } @@ -133,6 +574,24 @@ define <4 x i32> @bitreverse_v4i32(<4 x i32> %op) { ; CHECK-NEXT: rbit z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: rbit w9, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: rbit w9, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %op) ret <4 x i32> %res } @@ -146,6 +605,36 @@ define void @bitreverse_v8i32(ptr %a) { ; CHECK-NEXT: rbit z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: rbit w9, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: rbit w9, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: rbit w9, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: rbit w9, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: rbit w8, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %op) store <8 x i32> %res, ptr %a @@ -160,6 +649,17 @@ define <1 x i64> @bitreverse_v1i64(<1 x i64> %op) { ; CHECK-NEXT: rbit z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: rbit x8, x8 +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <1 x i64> @llvm.bitreverse.v1i64(<1 x i64> %op) ret <1 x i64> %res } @@ -172,6 +672,19 @@ define <2 x i64> @bitreverse_v2i64(<2 x i64> %op) { ; CHECK-NEXT: rbit z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: rbit x9, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: rbit x8, x8 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %op) ret <2 x i64> %res } @@ -185,6 +698,26 @@ define void @bitreverse_v4i64(ptr %a) { ; CHECK-NEXT: rbit z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: rbit x9, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: rbit x8, x8 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: rbit x9, x8 +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: rbit x8, x8 +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %op) store <4 x i64> %res, ptr %a @@ -204,6 +737,35 @@ define <2 x i16> @bswap_v2i16(<2 x i16> %op) { ; CHECK-NEXT: lsr z0.s, z0.s, #16 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bswap_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #15] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #13] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #11] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #9] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: str d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #22] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %op) ret <2 x i16> %res } @@ -216,6 +778,30 @@ define <4 x i16> @bswap_v4i16(<4 x i16> %op) { ; CHECK-NEXT: revb z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bswap_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #15] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #13] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #11] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #9] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %op) ret <4 x i16> %res } @@ -228,6 +814,46 @@ define <8 x i16> @bswap_v8i16(<8 x i16> %op) { ; CHECK-NEXT: revb z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bswap_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %op) ret <8 x i16> %res } @@ -241,6 +867,83 @@ define void @bswap_v16i16(ptr %a) { ; CHECK-NEXT: revb z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bswap_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: str q0, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #16] +; NONEON-NOSVE-NEXT: strb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #47] +; NONEON-NOSVE-NEXT: strb w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #59] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #57] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #55] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #53] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #51] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #49] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %op) store <16 x i16> %res, ptr %a @@ -255,6 +958,30 @@ define <2 x i32> @bswap_v2i32(<2 x i32> %op) { ; CHECK-NEXT: revb z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bswap_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #15] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #13] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #11] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #9] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %op) ret <2 x i32> %res } @@ -267,6 +994,46 @@ define <4 x i32> @bswap_v4i32(<4 x i32> %op) { ; CHECK-NEXT: revb z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bswap_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %op) ret <4 x i32> %res } @@ -280,6 +1047,83 @@ define void @bswap_v8i32(ptr %a) { ; CHECK-NEXT: revb z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bswap_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: str q0, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #16] +; NONEON-NOSVE-NEXT: strb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #47] +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #59] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #57] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #55] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #53] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #51] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #49] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %op) store <8 x i32> %res, ptr %a @@ -294,6 +1138,30 @@ define <1 x i64> @bswap_v1i64(<1 x i64> %op) { ; CHECK-NEXT: revb z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bswap_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #15] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #13] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #11] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #9] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call <1 x i64> @llvm.bswap.v1i64(<1 x i64> %op) ret <1 x i64> %res } @@ -306,6 +1174,46 @@ define <2 x i64> @bswap_v2i64(<2 x i64> %op) { ; CHECK-NEXT: revb z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bswap_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %op) ret <2 x i64> %res } @@ -319,6 +1227,83 @@ define void @bswap_v4i64(ptr %a) { ; CHECK-NEXT: revb z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bswap_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] +; NONEON-NOSVE-NEXT: str q0, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #11] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrb w8, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] +; NONEON-NOSVE-NEXT: strb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #21] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #3] +; NONEON-NOSVE-NEXT: strb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #19] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] +; NONEON-NOSVE-NEXT: strb w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #7] +; NONEON-NOSVE-NEXT: strb w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #16] +; NONEON-NOSVE-NEXT: strb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #61] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #44] +; NONEON-NOSVE-NEXT: strb w8, [sp, #59] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #57] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #47] +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #55] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #33] +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #53] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #35] +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #36] +; NONEON-NOSVE-NEXT: strb w8, [sp, #51] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #37] +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #49] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #39] +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %op) store <4 x i64> %res, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll index c022bf85e67e9..df019ce2e0ad6 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -14,6 +15,42 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1) { ; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #2] +; NONEON-NOSVE-NEXT: ldrh w12, [sp] +; NONEON-NOSVE-NEXT: sxtb w11, w8 +; NONEON-NOSVE-NEXT: sxtb w13, w9 +; NONEON-NOSVE-NEXT: sxtb w14, w10 +; NONEON-NOSVE-NEXT: sxtb w15, w12 +; NONEON-NOSVE-NEXT: ubfx w11, w11, #10, #5 +; NONEON-NOSVE-NEXT: ubfx w13, w13, #10, #5 +; NONEON-NOSVE-NEXT: ubfx w14, w14, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w11 +; NONEON-NOSVE-NEXT: ubfx w11, w15, #10, #5 +; NONEON-NOSVE-NEXT: add w9, w9, w13 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: add w10, w10, w14 +; NONEON-NOSVE-NEXT: sxtb w9, w9 +; NONEON-NOSVE-NEXT: add w11, w12, w11 +; NONEON-NOSVE-NEXT: sxtb w10, w10 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: sxtb w11, w11 +; NONEON-NOSVE-NEXT: lsr w9, w9, #5 +; NONEON-NOSVE-NEXT: lsr w10, w10, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #14] +; NONEON-NOSVE-NEXT: lsr w8, w11, #5 +; NONEON-NOSVE-NEXT: strh w9, [sp, #12] +; NONEON-NOSVE-NEXT: strh w10, [sp, #10] +; NONEON-NOSVE-NEXT: strh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = sdiv <4 x i8> %op1, shufflevector (<4 x i8> insertelement (<4 x i8> poison, i8 32, i32 0), <4 x i8> poison, <4 x i32> zeroinitializer) ret <4 x i8> %res } @@ -26,6 +63,62 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) { ; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #5 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #7] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #15] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #6] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #5] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #13] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #4] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #3] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #11] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #2] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #1] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #9] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = sdiv <8 x i8> %op1, shufflevector (<8 x i8> insertelement (<8 x i8> poison, i8 32, i32 0), <8 x i8> poison, <8 x i32> zeroinitializer) ret <8 x i8> %res } @@ -38,6 +131,110 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1) { ; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #5 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #15] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #14] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #13] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #12] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #11] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #10] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #9] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #8] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #7] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #6] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #5] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #21] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #4] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #3] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #19] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #2] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #1] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = sdiv <16 x i8> %op1, shufflevector (<16 x i8> insertelement (<16 x i8> poison, i8 32, i32 0), <16 x i8> poison, <16 x i32> zeroinitializer) ret <16 x i8> %res } @@ -51,6 +248,208 @@ define void @sdiv_v32i8(ptr %a) { ; CHECK-NEXT: asrd z1.b, p0/m, z1.b, #5 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #63] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #61] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #59] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #57] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #23] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #55] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #22] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #21] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #53] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #20] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #19] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #51] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #18] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #49] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #16] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #15] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #14] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #13] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #12] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #11] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #10] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #9] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #8] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #7] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #6] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #5] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #4] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #3] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #2] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #1] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrsb w8, [sp] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #10, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxtb w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %res = sdiv <32 x i8> %op1, shufflevector (<32 x i8> insertelement (<32 x i8> poison, i8 32, i32 0), <32 x i8> poison, <32 x i32> zeroinitializer) store <32 x i8> %res, ptr %a @@ -66,6 +465,24 @@ define <2 x i16> @sdiv_v2i16(<2 x i16> %op1) { ; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp] +; NONEON-NOSVE-NEXT: sxth w10, w8 +; NONEON-NOSVE-NEXT: sxth w11, w9 +; NONEON-NOSVE-NEXT: ubfx w10, w10, #26, #5 +; NONEON-NOSVE-NEXT: ubfx w11, w11, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w10 +; NONEON-NOSVE-NEXT: add w9, w9, w11 +; NONEON-NOSVE-NEXT: sbfx w8, w8, #5, #11 +; NONEON-NOSVE-NEXT: sbfx w9, w9, #5, #11 +; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = sdiv <2 x i16> %op1, shufflevector (<2 x i16> insertelement (<2 x i16> poison, i16 32, i32 0), <2 x i16> poison, <2 x i32> zeroinitializer) ret <2 x i16> %res } @@ -78,6 +495,38 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1) { ; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #6] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #4] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #2] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = sdiv <4 x i16> %op1, shufflevector (<4 x i16> insertelement (<4 x i16> poison, i16 32, i32 0), <4 x i16> poison, <4 x i32> zeroinitializer) ret <4 x i16> %res } @@ -90,6 +539,62 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1) { ; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #12] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #6] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #4] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #2] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = sdiv <8 x i16> %op1, shufflevector (<8 x i16> insertelement (<8 x i16> poison, i16 32, i32 0), <8 x i16> poison, <8 x i32> zeroinitializer) ret <8 x i16> %res } @@ -103,6 +608,112 @@ define void @sdiv_v16i16(ptr %a) { ; CHECK-NEXT: asrd z1.h, p0/m, z1.h, #5 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #26] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #58] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #22] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #54] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #20] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #50] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #16] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #12] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #6] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #4] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #2] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrsh w8, [sp] +; NONEON-NOSVE-NEXT: ubfx w9, w8, #26, #5 +; NONEON-NOSVE-NEXT: add w8, w8, w9 +; NONEON-NOSVE-NEXT: sxth w8, w8 +; NONEON-NOSVE-NEXT: lsr w8, w8, #5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %res = sdiv <16 x i16> %op1, shufflevector (<16 x i16> insertelement (<16 x i16> poison, i16 32, i32 0), <16 x i16> poison, <16 x i32> zeroinitializer) store <16 x i16> %res, ptr %a @@ -117,6 +728,23 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1) { ; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: asr w9, w8, #31 +; NONEON-NOSVE-NEXT: add w8, w8, w9, lsr #27 +; NONEON-NOSVE-NEXT: asr w10, w8, #5 +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: asr w9, w8, #31 +; NONEON-NOSVE-NEXT: add w8, w8, w9, lsr #27 +; NONEON-NOSVE-NEXT: asr w8, w8, #5 +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = sdiv <2 x i32> %op1, shufflevector (<2 x i32> insertelement (<2 x i32> poison, i32 32, i32 0), <2 x i32> poison, <2 x i32> zeroinitializer) ret <2 x i32> %res } @@ -129,6 +757,32 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1) { ; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: asr w9, w8, #31 +; NONEON-NOSVE-NEXT: add w8, w8, w9, lsr #27 +; NONEON-NOSVE-NEXT: asr w10, w8, #5 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: asr w9, w8, #31 +; NONEON-NOSVE-NEXT: add w8, w8, w9, lsr #27 +; NONEON-NOSVE-NEXT: asr w8, w8, #5 +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #24] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: asr w9, w8, #31 +; NONEON-NOSVE-NEXT: add w8, w8, w9, lsr #27 +; NONEON-NOSVE-NEXT: asr w10, w8, #5 +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: asr w9, w8, #31 +; NONEON-NOSVE-NEXT: add w8, w8, w9, lsr #27 +; NONEON-NOSVE-NEXT: asr w8, w8, #5 +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = sdiv <4 x i32> %op1, shufflevector (<4 x i32> insertelement (<4 x i32> poison, i32 32, i32 0), <4 x i32> poison, <4 x i32> zeroinitializer) ret <4 x i32> %res } @@ -142,6 +796,52 @@ define void @sdiv_v8i32(ptr %a) { ; CHECK-NEXT: asrd z1.s, p0/m, z1.s, #5 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: asr w9, w8, #31 +; NONEON-NOSVE-NEXT: add w8, w8, w9, lsr #27 +; NONEON-NOSVE-NEXT: asr w10, w8, #5 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: asr w9, w8, #31 +; NONEON-NOSVE-NEXT: add w8, w8, w9, lsr #27 +; NONEON-NOSVE-NEXT: asr w8, w8, #5 +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: asr w9, w8, #31 +; NONEON-NOSVE-NEXT: add w8, w8, w9, lsr #27 +; NONEON-NOSVE-NEXT: asr w10, w8, #5 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: asr w9, w8, #31 +; NONEON-NOSVE-NEXT: add w8, w8, w9, lsr #27 +; NONEON-NOSVE-NEXT: asr w8, w8, #5 +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: asr w9, w8, #31 +; NONEON-NOSVE-NEXT: add w8, w8, w9, lsr #27 +; NONEON-NOSVE-NEXT: asr w10, w8, #5 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: asr w9, w8, #31 +; NONEON-NOSVE-NEXT: add w8, w8, w9, lsr #27 +; NONEON-NOSVE-NEXT: asr w8, w8, #5 +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #4] +; NONEON-NOSVE-NEXT: asr w9, w8, #31 +; NONEON-NOSVE-NEXT: add w8, w8, w9, lsr #27 +; NONEON-NOSVE-NEXT: asr w10, w8, #5 +; NONEON-NOSVE-NEXT: ldr w8, [sp] +; NONEON-NOSVE-NEXT: asr w9, w8, #31 +; NONEON-NOSVE-NEXT: add w8, w8, w9, lsr #27 +; NONEON-NOSVE-NEXT: asr w8, w8, #5 +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %res = sdiv <8 x i32> %op1, shufflevector (<8 x i32> insertelement (<8 x i32> poison, i32 32, i32 0), <8 x i32> poison, <8 x i32> zeroinitializer) store <8 x i32> %res, ptr %a @@ -156,6 +856,19 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1) { ; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #5 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: asr x9, x8, #63 +; NONEON-NOSVE-NEXT: add x8, x8, x9, lsr #59 +; NONEON-NOSVE-NEXT: asr x8, x8, #5 +; NONEON-NOSVE-NEXT: str x8, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = sdiv <1 x i64> %op1, shufflevector (<1 x i64> insertelement (<1 x i64> poison, i64 32, i32 0), <1 x i64> poison, <1 x i32> zeroinitializer) ret <1 x i64> %res } @@ -169,6 +882,23 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1) { ; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #5 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: asr x9, x8, #63 +; NONEON-NOSVE-NEXT: add x8, x8, x9, lsr #59 +; NONEON-NOSVE-NEXT: asr x10, x8, #5 +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: asr x9, x8, #63 +; NONEON-NOSVE-NEXT: add x8, x8, x9, lsr #59 +; NONEON-NOSVE-NEXT: asr x8, x8, #5 +; NONEON-NOSVE-NEXT: stp x8, x10, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = sdiv <2 x i64> %op1, shufflevector (<2 x i64> insertelement (<2 x i64> poison, i64 32, i32 0), <2 x i64> poison, <2 x i32> zeroinitializer) ret <2 x i64> %res } @@ -182,6 +912,34 @@ define void @sdiv_v4i64(ptr %a) { ; CHECK-NEXT: asrd z1.d, p0/m, z1.d, #5 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] +; NONEON-NOSVE-NEXT: asr x9, x8, #63 +; NONEON-NOSVE-NEXT: add x8, x8, x9, lsr #59 +; NONEON-NOSVE-NEXT: asr x10, x8, #5 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: asr x9, x8, #63 +; NONEON-NOSVE-NEXT: add x8, x8, x9, lsr #59 +; NONEON-NOSVE-NEXT: asr x8, x8, #5 +; NONEON-NOSVE-NEXT: stp x8, x10, [sp, #48] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: asr x9, x8, #63 +; NONEON-NOSVE-NEXT: add x8, x8, x9, lsr #59 +; NONEON-NOSVE-NEXT: asr x10, x8, #5 +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: asr x9, x8, #63 +; NONEON-NOSVE-NEXT: add x8, x8, x9, lsr #59 +; NONEON-NOSVE-NEXT: asr x8, x8, #5 +; NONEON-NOSVE-NEXT: stp x8, x10, [sp, #32] +; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %res = sdiv <4 x i64> %op1, shufflevector (<4 x i64> insertelement (<4 x i64> poison, i64 32, i32 0), <4 x i64> poison, <4 x i32> zeroinitializer) store <4 x i64> %res, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll index 38aaf860b7298..b66e6d9013573 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll @@ -18,9 +18,15 @@ define void @hang_when_merging_stores_after_legalisation(ptr %a, <2 x i32> %b) { ; ; NONEON-NOSVE-LABEL: hang_when_merging_stores_after_legalisation: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 -; NONEON-NOSVE-NEXT: dup v0.4s, v0.s[0] +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w8, [sp, #24] +; NONEON-NOSVE-NEXT: stp w8, w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] ; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #32 ; NONEON-NOSVE-NEXT: ret %splat = shufflevector <2 x i32> %b, <2 x i32> undef, <8 x i32> zeroinitializer %interleaved.vec = shufflevector <8 x i32> %splat, <8 x i32> undef, <8 x i32> @@ -39,9 +45,25 @@ define void @interleave_store_without_splat(ptr %a, <4 x i32> %v1, <4 x i32> %v2 ; ; NONEON-NOSVE-LABEL: interleave_store_without_splat: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: zip2 v2.4s, v0.4s, v1.4s -; NONEON-NOSVE-NEXT: zip1 v0.4s, v0.4s, v1.4s -; NONEON-NOSVE-NEXT: stp q0, q2, [x0] +; NONEON-NOSVE-NEXT: sub sp, sp, #64 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #36] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #44] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldr q1, [sp] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #64 ; NONEON-NOSVE-NEXT: ret %shuffle = shufflevector <4 x i32> %v1, <4 x i32> %v2, <8 x i32> %interleaved = shufflevector <8 x i32> %shuffle, <8 x i32> undef, <8 x i32> @@ -64,12 +86,40 @@ define void @interleave_store_legalization(ptr %a, <8 x i32> %v1, <8 x i32> %v2) ; ; NONEON-NOSVE-LABEL: interleave_store_legalization: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: zip2 v4.4s, v1.4s, v3.4s -; NONEON-NOSVE-NEXT: zip1 v1.4s, v1.4s, v3.4s -; NONEON-NOSVE-NEXT: zip2 v3.4s, v0.4s, v2.4s -; NONEON-NOSVE-NEXT: zip1 v0.4s, v0.4s, v2.4s -; NONEON-NOSVE-NEXT: stp q1, q4, [x0, #32] -; NONEON-NOSVE-NEXT: stp q0, q3, [x0] +; NONEON-NOSVE-NEXT: sub sp, sp, #128 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 +; NONEON-NOSVE-NEXT: stp q1, q3, [sp, #16] +; NONEON-NOSVE-NEXT: stp q0, q2, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #100] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #84] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #96] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #108] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #92] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #104] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #88] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #36] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] +; NONEON-NOSVE-NEXT: ldr q3, [sp, #112] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #32] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #44] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldr q1, [sp] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: ldp q0, q2, [sp, #48] +; NONEON-NOSVE-NEXT: stp q2, q3, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #128 ; NONEON-NOSVE-NEXT: ret %interleaved.vec = shufflevector <8 x i32> %v1, <8 x i32> %v2, <16 x i32> diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll index 649b13fa8a1e3..a4cf5d608fed6 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE @@ -15,6 +16,18 @@ define <4 x i8> @splat_v4i8(i8 %a) { ; CHECK-NEXT: mov z0.h, w0 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: strh w0, [sp, #14] +; NONEON-NOSVE-NEXT: strh w0, [sp, #12] +; NONEON-NOSVE-NEXT: strh w0, [sp, #10] +; NONEON-NOSVE-NEXT: strh w0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <4 x i8> undef, i8 %a, i64 0 %splat = shufflevector <4 x i8> %insert, <4 x i8> undef, <4 x i32> zeroinitializer ret <4 x i8> %splat @@ -26,6 +39,22 @@ define <8 x i8> @splat_v8i8(i8 %a) { ; CHECK-NEXT: mov z0.b, w0 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: strb w0, [sp, #15] +; NONEON-NOSVE-NEXT: strb w0, [sp, #14] +; NONEON-NOSVE-NEXT: strb w0, [sp, #13] +; NONEON-NOSVE-NEXT: strb w0, [sp, #12] +; NONEON-NOSVE-NEXT: strb w0, [sp, #11] +; NONEON-NOSVE-NEXT: strb w0, [sp, #10] +; NONEON-NOSVE-NEXT: strb w0, [sp, #9] +; NONEON-NOSVE-NEXT: strb w0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <8 x i8> undef, i8 %a, i64 0 %splat = shufflevector <8 x i8> %insert, <8 x i8> undef, <8 x i32> zeroinitializer ret <8 x i8> %splat @@ -37,6 +66,29 @@ define <16 x i8> @splat_v16i8(i8 %a) { ; CHECK-NEXT: mov z0.b, w0 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: strb w0, [sp, #15] +; NONEON-NOSVE-NEXT: strb w0, [sp, #14] +; NONEON-NOSVE-NEXT: strb w0, [sp, #13] +; NONEON-NOSVE-NEXT: strb w0, [sp, #12] +; NONEON-NOSVE-NEXT: strb w0, [sp, #11] +; NONEON-NOSVE-NEXT: strb w0, [sp, #10] +; NONEON-NOSVE-NEXT: strb w0, [sp, #9] +; NONEON-NOSVE-NEXT: strb w0, [sp, #8] +; NONEON-NOSVE-NEXT: strb w0, [sp, #7] +; NONEON-NOSVE-NEXT: strb w0, [sp, #6] +; NONEON-NOSVE-NEXT: strb w0, [sp, #5] +; NONEON-NOSVE-NEXT: strb w0, [sp, #4] +; NONEON-NOSVE-NEXT: strb w0, [sp, #3] +; NONEON-NOSVE-NEXT: strb w0, [sp, #2] +; NONEON-NOSVE-NEXT: strb w0, [sp, #1] +; NONEON-NOSVE-NEXT: strb w0, [sp] +; NONEON-NOSVE-NEXT: ldr q0, [sp], #16 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <16 x i8> undef, i8 %a, i64 0 %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ret <16 x i8> %splat @@ -48,6 +100,31 @@ define void @splat_v32i8(i8 %a, ptr %b) { ; CHECK-NEXT: mov z0.b, w0 ; CHECK-NEXT: stp q0, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: strb w0, [sp, #15] +; NONEON-NOSVE-NEXT: strb w0, [sp, #14] +; NONEON-NOSVE-NEXT: strb w0, [sp, #13] +; NONEON-NOSVE-NEXT: strb w0, [sp, #12] +; NONEON-NOSVE-NEXT: strb w0, [sp, #11] +; NONEON-NOSVE-NEXT: strb w0, [sp, #10] +; NONEON-NOSVE-NEXT: strb w0, [sp, #9] +; NONEON-NOSVE-NEXT: strb w0, [sp, #8] +; NONEON-NOSVE-NEXT: strb w0, [sp, #7] +; NONEON-NOSVE-NEXT: strb w0, [sp, #6] +; NONEON-NOSVE-NEXT: strb w0, [sp, #5] +; NONEON-NOSVE-NEXT: strb w0, [sp, #4] +; NONEON-NOSVE-NEXT: strb w0, [sp, #3] +; NONEON-NOSVE-NEXT: strb w0, [sp, #2] +; NONEON-NOSVE-NEXT: strb w0, [sp, #1] +; NONEON-NOSVE-NEXT: strb w0, [sp] +; NONEON-NOSVE-NEXT: ldr q0, [sp] +; NONEON-NOSVE-NEXT: stp q0, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <32 x i8> undef, i8 %a, i64 0 %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer store <32 x i8> %splat, ptr %b @@ -60,6 +137,15 @@ define <2 x i16> @splat_v2i16(i16 %a) { ; CHECK-NEXT: mov z0.s, w0 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: stp w0, w0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <2 x i16> undef, i16 %a, i64 0 %splat = shufflevector <2 x i16> %insert, <2 x i16> undef, <2 x i32> zeroinitializer ret <2 x i16> %splat @@ -71,6 +157,18 @@ define <4 x i16> @splat_v4i16(i16 %a) { ; CHECK-NEXT: mov z0.h, w0 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: strh w0, [sp, #14] +; NONEON-NOSVE-NEXT: strh w0, [sp, #12] +; NONEON-NOSVE-NEXT: strh w0, [sp, #10] +; NONEON-NOSVE-NEXT: strh w0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <4 x i16> undef, i16 %a, i64 0 %splat = shufflevector <4 x i16> %insert, <4 x i16> undef, <4 x i32> zeroinitializer ret <4 x i16> %splat @@ -82,6 +180,21 @@ define <8 x i16> @splat_v8i16(i16 %a) { ; CHECK-NEXT: mov z0.h, w0 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: strh w0, [sp, #14] +; NONEON-NOSVE-NEXT: strh w0, [sp, #12] +; NONEON-NOSVE-NEXT: strh w0, [sp, #10] +; NONEON-NOSVE-NEXT: strh w0, [sp, #8] +; NONEON-NOSVE-NEXT: strh w0, [sp, #6] +; NONEON-NOSVE-NEXT: strh w0, [sp, #4] +; NONEON-NOSVE-NEXT: strh w0, [sp, #2] +; NONEON-NOSVE-NEXT: strh w0, [sp] +; NONEON-NOSVE-NEXT: ldr q0, [sp], #16 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <8 x i16> undef, i16 %a, i64 0 %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ret <8 x i16> %splat @@ -93,6 +206,23 @@ define void @splat_v16i16(i16 %a, ptr %b) { ; CHECK-NEXT: mov z0.h, w0 ; CHECK-NEXT: stp q0, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: strh w0, [sp, #14] +; NONEON-NOSVE-NEXT: strh w0, [sp, #12] +; NONEON-NOSVE-NEXT: strh w0, [sp, #10] +; NONEON-NOSVE-NEXT: strh w0, [sp, #8] +; NONEON-NOSVE-NEXT: strh w0, [sp, #6] +; NONEON-NOSVE-NEXT: strh w0, [sp, #4] +; NONEON-NOSVE-NEXT: strh w0, [sp, #2] +; NONEON-NOSVE-NEXT: strh w0, [sp] +; NONEON-NOSVE-NEXT: ldr q0, [sp] +; NONEON-NOSVE-NEXT: stp q0, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <16 x i16> undef, i16 %a, i64 0 %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer store <16 x i16> %splat, ptr %b @@ -105,6 +235,15 @@ define <2 x i32> @splat_v2i32(i32 %a) { ; CHECK-NEXT: mov z0.s, w0 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: stp w0, w0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <2 x i32> undef, i32 %a, i64 0 %splat = shufflevector <2 x i32> %insert, <2 x i32> undef, <2 x i32> zeroinitializer ret <2 x i32> %splat @@ -116,6 +255,15 @@ define <4 x i32> @splat_v4i32(i32 %a) { ; CHECK-NEXT: mov z0.s, w0 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: stp w0, w0, [sp, #8] +; NONEON-NOSVE-NEXT: stp w0, w0, [sp] +; NONEON-NOSVE-NEXT: ldr q0, [sp], #16 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <4 x i32> undef, i32 %a, i64 0 %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat @@ -127,6 +275,17 @@ define void @splat_v8i32(i32 %a, ptr %b) { ; CHECK-NEXT: mov z0.s, w0 ; CHECK-NEXT: stp q0, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: stp w0, w0, [sp, #8] +; NONEON-NOSVE-NEXT: stp w0, w0, [sp] +; NONEON-NOSVE-NEXT: ldr q0, [sp] +; NONEON-NOSVE-NEXT: stp q0, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <8 x i32> undef, i32 %a, i64 0 %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer store <8 x i32> %splat, ptr %b @@ -139,6 +298,15 @@ define <1 x i64> @splat_v1i64(i64 %a) { ; CHECK-NEXT: mov z0.d, x0 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str x0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <1 x i64> undef, i64 %a, i64 0 %splat = shufflevector <1 x i64> %insert, <1 x i64> undef, <1 x i32> zeroinitializer ret <1 x i64> %splat @@ -150,6 +318,13 @@ define <2 x i64> @splat_v2i64(i64 %a) { ; CHECK-NEXT: mov z0.d, x0 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp x0, x0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr q0, [sp], #16 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <2 x i64> undef, i64 %a, i64 0 %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat @@ -161,6 +336,15 @@ define void @splat_v4i64(i64 %a, ptr %b) { ; CHECK-NEXT: mov z0.d, x0 ; CHECK-NEXT: stp q0, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp x0, x0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr q0, [sp] +; NONEON-NOSVE-NEXT: stp q0, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <4 x i64> undef, i64 %a, i64 0 %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer store <4 x i64> %splat, ptr %b @@ -178,6 +362,16 @@ define <2 x half> @splat_v2f16(half %a) { ; CHECK-NEXT: mov z0.h, h0 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <2 x half> undef, half %a, i64 0 %splat = shufflevector <2 x half> %insert, <2 x half> undef, <2 x i32> zeroinitializer ret <2 x half> %splat @@ -190,6 +384,18 @@ define <4 x half> @splat_v4f16(half %a) { ; CHECK-NEXT: mov z0.h, h0 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str h0, [sp, #14] +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <4 x half> undef, half %a, i64 0 %splat = shufflevector <4 x half> %insert, <4 x half> undef, <4 x i32> zeroinitializer ret <4 x half> %splat @@ -202,6 +408,21 @@ define <8 x half> @splat_v8f16(half %a) { ; CHECK-NEXT: mov z0.h, h0 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str h0, [sp, #14] +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: str h0, [sp, #6] +; NONEON-NOSVE-NEXT: str h0, [sp, #4] +; NONEON-NOSVE-NEXT: str h0, [sp, #2] +; NONEON-NOSVE-NEXT: str h0, [sp] +; NONEON-NOSVE-NEXT: ldr q0, [sp], #16 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <8 x half> undef, half %a, i64 0 %splat = shufflevector <8 x half> %insert, <8 x half> undef, <8 x i32> zeroinitializer ret <8 x half> %splat @@ -214,6 +435,23 @@ define void @splat_v16f16(half %a, ptr %b) { ; CHECK-NEXT: mov z0.h, h0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str h0, [sp, #14] +; NONEON-NOSVE-NEXT: str h0, [sp, #12] +; NONEON-NOSVE-NEXT: str h0, [sp, #10] +; NONEON-NOSVE-NEXT: str h0, [sp, #8] +; NONEON-NOSVE-NEXT: str h0, [sp, #6] +; NONEON-NOSVE-NEXT: str h0, [sp, #4] +; NONEON-NOSVE-NEXT: str h0, [sp, #2] +; NONEON-NOSVE-NEXT: str h0, [sp] +; NONEON-NOSVE-NEXT: ldr q0, [sp] +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <16 x half> undef, half %a, i64 0 %splat = shufflevector <16 x half> %insert, <16 x half> undef, <16 x i32> zeroinitializer store <16 x half> %splat, ptr %b @@ -227,6 +465,15 @@ define <2 x float> @splat_v2f32(float %a, <2 x float> %op2) { ; CHECK-NEXT: mov z0.s, s0 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: stp s0, s0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <2 x float> undef, float %a, i64 0 %splat = shufflevector <2 x float> %insert, <2 x float> undef, <2 x i32> zeroinitializer ret <2 x float> %splat @@ -239,6 +486,15 @@ define <4 x float> @splat_v4f32(float %a, <4 x float> %op2) { ; CHECK-NEXT: mov z0.s, s0 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: stp s0, s0, [sp, #8] +; NONEON-NOSVE-NEXT: stp s0, s0, [sp] +; NONEON-NOSVE-NEXT: ldr q0, [sp], #16 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <4 x float> undef, float %a, i64 0 %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer ret <4 x float> %splat @@ -251,6 +507,17 @@ define void @splat_v8f32(float %a, ptr %b) { ; CHECK-NEXT: mov z0.s, s0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: stp s0, s0, [sp, #8] +; NONEON-NOSVE-NEXT: stp s0, s0, [sp] +; NONEON-NOSVE-NEXT: ldr q0, [sp] +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <8 x float> undef, float %a, i64 0 %splat = shufflevector <8 x float> %insert, <8 x float> undef, <8 x i32> zeroinitializer store <8 x float> %splat, ptr %b @@ -261,6 +528,15 @@ define <1 x double> @splat_v1f64(double %a, <1 x double> %op2) { ; CHECK-LABEL: splat_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <1 x double> undef, double %a, i64 0 %splat = shufflevector <1 x double> %insert, <1 x double> undef, <1 x i32> zeroinitializer ret <1 x double> %splat @@ -273,6 +549,13 @@ define <2 x double> @splat_v2f64(double %a, <2 x double> %op2) { ; CHECK-NEXT: mov z0.d, d0 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp d0, d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr q0, [sp], #16 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <2 x double> undef, double %a, i64 0 %splat = shufflevector <2 x double> %insert, <2 x double> undef, <2 x i32> zeroinitializer ret <2 x double> %splat @@ -285,6 +568,15 @@ define void @splat_v4f64(double %a, ptr %b) { ; CHECK-NEXT: mov z0.d, d0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp d0, d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr q0, [sp] +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <4 x double> undef, double %a, i64 0 %splat = shufflevector <4 x double> %insert, <4 x double> undef, <4 x i32> zeroinitializer store <4 x double> %splat, ptr %b @@ -301,6 +593,13 @@ define void @splat_imm_v32i8(ptr %a) { ; CHECK-NEXT: mov z0.b, #1 // =0x1 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_imm_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI24_0 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI24_0] +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <32 x i8> undef, i8 1, i64 0 %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer store <32 x i8> %splat, ptr %a @@ -313,6 +612,13 @@ define void @splat_imm_v16i16(ptr %a) { ; CHECK-NEXT: mov z0.h, #2 // =0x2 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_imm_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI25_0 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI25_0] +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <16 x i16> undef, i16 2, i64 0 %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer store <16 x i16> %splat, ptr %a @@ -325,6 +631,13 @@ define void @splat_imm_v8i32(ptr %a) { ; CHECK-NEXT: mov z0.s, #3 // =0x3 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_imm_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI26_0 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI26_0] +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <8 x i32> undef, i32 3, i64 0 %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer store <8 x i32> %splat, ptr %a @@ -337,6 +650,13 @@ define void @splat_imm_v4i64(ptr %a) { ; CHECK-NEXT: mov z0.d, #4 // =0x4 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_imm_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI27_0 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI27_0] +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <4 x i64> undef, i64 4, i64 0 %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer store <4 x i64> %splat, ptr %a @@ -353,6 +673,13 @@ define void @splat_imm_v16f16(ptr %a) { ; CHECK-NEXT: fmov z0.h, #5.00000000 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_imm_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI28_0 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI28_0] +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <16 x half> undef, half 5.0, i64 0 %splat = shufflevector <16 x half> %insert, <16 x half> undef, <16 x i32> zeroinitializer store <16 x half> %splat, ptr %a @@ -365,6 +692,13 @@ define void @splat_imm_v8f32(ptr %a) { ; CHECK-NEXT: fmov z0.s, #6.00000000 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_imm_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI29_0 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI29_0] +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <8 x float> undef, float 6.0, i64 0 %splat = shufflevector <8 x float> %insert, <8 x float> undef, <8 x i32> zeroinitializer store <8 x float> %splat, ptr %a @@ -377,6 +711,13 @@ define void @splat_imm_v4f64(ptr %a) { ; CHECK-NEXT: fmov z0.d, #7.00000000 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_imm_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI30_0 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI30_0] +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <4 x double> undef, double 7.0, i64 0 %splat = shufflevector <4 x double> %insert, <4 x double> undef, <4 x i32> zeroinitializer store <4 x double> %splat, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll index c7435bdbec949..a77ac7832e17c 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -12,6 +13,11 @@ define void @store_v4i8(ptr %a) { ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: st1b { z0.h }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str wzr, [x0] +; NONEON-NOSVE-NEXT: ret store <4 x i8> zeroinitializer, ptr %a ret void } @@ -22,6 +28,13 @@ define void @store_v8i8(ptr %a) { ; CHECK-NEXT: mov z0.b, #0 // =0x0 ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI1_0 +; NONEON-NOSVE-NEXT: ldr d0, [x8, :lo12:.LCPI1_0] +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: ret store <8 x i8> zeroinitializer, ptr %a ret void } @@ -32,6 +45,13 @@ define void @store_v16i8(ptr %a) { ; CHECK-NEXT: mov z0.b, #0 // =0x0 ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI2_0 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI2_0] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret store <16 x i8> zeroinitializer, ptr %a ret void } @@ -42,6 +62,13 @@ define void @store_v32i8(ptr %a) { ; CHECK-NEXT: mov z0.b, #0 // =0x0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI3_0 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI3_0] +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <32 x i8> zeroinitializer, ptr %a ret void } @@ -53,6 +80,11 @@ define void @store_v2i16(ptr %a) { ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: st1h { z0.s }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str wzr, [x0] +; NONEON-NOSVE-NEXT: ret store <2 x i16> zeroinitializer, ptr %a ret void } @@ -64,6 +96,18 @@ define void @store_v2f16(ptr %a) { ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI5_0 +; NONEON-NOSVE-NEXT: ldr d0, [x8, :lo12:.LCPI5_0] +; NONEON-NOSVE-NEXT: str d0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: str w8, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret store <2 x half> zeroinitializer, ptr %a ret void } @@ -74,6 +118,13 @@ define void @store_v4i16(ptr %a) { ; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI6_0 +; NONEON-NOSVE-NEXT: ldr d0, [x8, :lo12:.LCPI6_0] +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: ret store <4 x i16> zeroinitializer, ptr %a ret void } @@ -84,6 +135,13 @@ define void @store_v4f16(ptr %a) { ; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI7_0 +; NONEON-NOSVE-NEXT: ldr d0, [x8, :lo12:.LCPI7_0] +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: ret store <4 x half> zeroinitializer, ptr %a ret void } @@ -94,6 +152,13 @@ define void @store_v8i16(ptr %a) { ; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI8_0 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI8_0] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret store <8 x i16> zeroinitializer, ptr %a ret void } @@ -104,6 +169,13 @@ define void @store_v8f16(ptr %a) { ; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI9_0 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI9_0] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret store <8 x half> zeroinitializer, ptr %a ret void } @@ -114,6 +186,13 @@ define void @store_v16i16(ptr %a) { ; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI10_0 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI10_0] +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <16 x i16> zeroinitializer, ptr %a ret void } @@ -124,6 +203,13 @@ define void @store_v16f16(ptr %a) { ; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI11_0 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI11_0] +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <16 x half> zeroinitializer, ptr %a ret void } @@ -133,6 +219,11 @@ define void @store_v2i32(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: str xzr, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str xzr, [x0] +; NONEON-NOSVE-NEXT: ret store <2 x i32> zeroinitializer, ptr %a ret void } @@ -142,6 +233,11 @@ define void @store_v2f32(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: str xzr, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str xzr, [x0] +; NONEON-NOSVE-NEXT: ret store <2 x float> zeroinitializer, ptr %a ret void } @@ -151,6 +247,11 @@ define void @store_v4i32(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: stp xzr, xzr, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp xzr, xzr, [x0] +; NONEON-NOSVE-NEXT: ret store <4 x i32> zeroinitializer, ptr %a ret void } @@ -160,6 +261,11 @@ define void @store_v4f32(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: stp xzr, xzr, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp xzr, xzr, [x0] +; NONEON-NOSVE-NEXT: ret store <4 x float> zeroinitializer, ptr %a ret void } @@ -170,6 +276,13 @@ define void @store_v8i32(ptr %a) { ; CHECK-NEXT: mov z0.s, #0 // =0x0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI16_0 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI16_0] +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <8 x i32> zeroinitializer, ptr %a ret void } @@ -180,6 +293,13 @@ define void @store_v8f32(ptr %a) { ; CHECK-NEXT: mov z0.s, #0 // =0x0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI17_0 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI17_0] +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <8 x float> zeroinitializer, ptr %a ret void } @@ -190,6 +310,16 @@ define void @store_v1i64(ptr %a) { ; CHECK-NEXT: mov z0.d, #0 // =0x0 ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str xzr, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret store <1 x i64> zeroinitializer, ptr %a ret void } @@ -200,6 +330,16 @@ define void @store_v1f64(ptr %a) { ; CHECK-NEXT: fmov d0, xzr ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str xzr, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret store <1 x double> zeroinitializer, ptr %a ret void } @@ -209,6 +349,11 @@ define void @store_v2i64(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: stp xzr, xzr, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp xzr, xzr, [x0] +; NONEON-NOSVE-NEXT: ret store <2 x i64> zeroinitializer, ptr %a ret void } @@ -218,6 +363,11 @@ define void @store_v2f64(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: stp xzr, xzr, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp xzr, xzr, [x0] +; NONEON-NOSVE-NEXT: ret store <2 x double> zeroinitializer, ptr %a ret void } @@ -228,6 +378,13 @@ define void @store_v4i64(ptr %a) { ; CHECK-NEXT: mov z0.d, #0 // =0x0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI22_0 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI22_0] +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <4 x i64> zeroinitializer, ptr %a ret void } @@ -238,6 +395,13 @@ define void @store_v4f64(ptr %a) { ; CHECK-NEXT: mov z0.d, #0 // =0x0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI23_0 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI23_0] +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <4 x double> zeroinitializer, ptr %a ret void } diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll index 9e04fc236836c..a9f4d92b1e6b6 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE ; Test we can code generater patterns of the form: @@ -23,6 +24,16 @@ define void @subvector_v4i8(ptr %in, ptr %out) { ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] ; CHECK-NEXT: st1b { z0.h }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v4i8: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldrh w8, [x0, #2] +; NONEON-NOSVE-NEXT: ldrb w9, [x0, #1] +; NONEON-NOSVE-NEXT: ldrb w10, [x0] +; NONEON-NOSVE-NEXT: strh w8, [x1, #2] +; NONEON-NOSVE-NEXT: strb w9, [x1, #1] +; NONEON-NOSVE-NEXT: strb w10, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <4 x i8>, ptr %in br label %bb1 @@ -37,6 +48,12 @@ define void @subvector_v8i8(ptr %in, ptr %out) { ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v8i8: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <8 x i8>, ptr %in br label %bb1 @@ -51,6 +68,12 @@ define void @subvector_v16i8(ptr %in, ptr %out) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v16i8: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <16 x i8>, ptr %in br label %bb1 @@ -65,6 +88,12 @@ define void @subvector_v32i8(ptr %in, ptr %out) { ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v32i8: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <32 x i8>, ptr %in br label %bb1 @@ -81,6 +110,12 @@ define void @subvector_v2i16(ptr %in, ptr %out) { ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] ; CHECK-NEXT: st1h { z0.s }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v2i16: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr w8, [x0] +; NONEON-NOSVE-NEXT: str w8, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <2 x i16>, ptr %in br label %bb1 @@ -95,6 +130,12 @@ define void @subvector_v4i16(ptr %in, ptr %out) { ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v4i16: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <4 x i16>, ptr %in br label %bb1 @@ -109,6 +150,12 @@ define void @subvector_v8i16(ptr %in, ptr %out) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v8i16: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <8 x i16>, ptr %in br label %bb1 @@ -123,6 +170,12 @@ define void @subvector_v16i16(ptr %in, ptr %out) { ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v16i16: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <16 x i16>, ptr %in br label %bb1 @@ -138,6 +191,12 @@ define void @subvector_v2i32(ptr %in, ptr %out) { ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v2i32: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <2 x i32>, ptr %in br label %bb1 @@ -152,6 +211,12 @@ define void @subvector_v4i32(ptr %in, ptr %out) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v4i32: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <4 x i32>, ptr %in br label %bb1 @@ -166,6 +231,12 @@ define void @subvector_v8i32(ptr %in, ptr %out) { ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v8i32: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <8 x i32>, ptr %in br label %bb1 @@ -181,6 +252,12 @@ define void @subvector_v2i64(ptr %in, ptr %out) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v2i64: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <2 x i64>, ptr %in br label %bb1 @@ -195,6 +272,12 @@ define void @subvector_v4i64(ptr %in, ptr %out) { ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v4i64: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <4 x i64>, ptr %in br label %bb1 @@ -210,6 +293,12 @@ define void @subvector_v2f16(ptr %in, ptr %out) { ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: str w8, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v2f16: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr w8, [x0] +; NONEON-NOSVE-NEXT: str w8, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <2 x half>, ptr %in br label %bb1 @@ -224,6 +313,12 @@ define void @subvector_v4f16(ptr %in, ptr %out) { ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v4f16: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <4 x half>, ptr %in br label %bb1 @@ -238,6 +333,12 @@ define void @subvector_v8f16(ptr %in, ptr %out) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v8f16: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <8 x half>, ptr %in br label %bb1 @@ -252,6 +353,12 @@ define void @subvector_v16f16(ptr %in, ptr %out) { ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v16f16: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <16 x half>, ptr %in br label %bb1 @@ -267,6 +374,12 @@ define void @subvector_v2f32(ptr %in, ptr %out) { ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v2f32: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <2 x float>, ptr %in br label %bb1 @@ -281,6 +394,12 @@ define void @subvector_v4f32(ptr %in, ptr %out) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v4f32: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <4 x float>, ptr %in br label %bb1 @@ -295,6 +414,12 @@ define void @subvector_v8f32(ptr %in, ptr %out) { ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v8f32: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <8 x float>,ptr %in br label %bb1 @@ -310,6 +435,12 @@ define void @subvector_v2f64(ptr %in, ptr %out) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v2f64: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <2 x double>, ptr %in br label %bb1 @@ -324,6 +455,12 @@ define void @subvector_v4f64(ptr %in, ptr %out) { ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v4f64: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <4 x double>, ptr %in br label %bb1 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll index b34fe438a063a..30682751037fe 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -12,6 +13,32 @@ define void @store_trunc_v8i16i8(ptr %ap, ptr %dest) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: st1b { z0.h }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_trunc_v8i16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #27] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrh w8, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %a = load <8 x i16>, ptr %ap %val = trunc <8 x i16> %a to <8 x i8> store <8 x i8> %val, ptr %dest @@ -25,6 +52,20 @@ define void @store_trunc_v4i32i8(ptr %ap, ptr %dest) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: st1b { z0.s }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_trunc_v4i32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w10, w11, [sp] +; NONEON-NOSVE-NEXT: strb w8, [x1, #3] +; NONEON-NOSVE-NEXT: strb w9, [x1, #2] +; NONEON-NOSVE-NEXT: strb w11, [x1, #1] +; NONEON-NOSVE-NEXT: strb w10, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %a = load <4 x i32>, ptr %ap %val = trunc <4 x i32> %a to <4 x i8> store <4 x i8> %val, ptr %dest @@ -38,6 +79,22 @@ define void @store_trunc_v4i32i16(ptr %ap, ptr %dest) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: st1h { z0.s }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_trunc_v4i32i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w9, [sp, #30] +; NONEON-NOSVE-NEXT: strh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp] +; NONEON-NOSVE-NEXT: strh w9, [sp, #26] +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %a = load <4 x i32>, ptr %ap %val = trunc <4 x i32> %a to <4 x i16> store <4 x i16> %val, ptr %dest @@ -51,6 +108,18 @@ define void @store_trunc_v2i64i8(ptr %ap, ptr %dest) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: st1w { z0.d }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_trunc_v2i64i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %a = load <2 x i64>, ptr %ap %val = trunc <2 x i64> %a to <2 x i32> store <2 x i32> %val, ptr %dest @@ -66,6 +135,19 @@ define void @store_trunc_v2i256i64(ptr %ap, ptr %dest) { ; CHECK-NEXT: splice z1.d, p0, z1.d, z0.d ; CHECK-NEXT: str q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_trunc_v2i256i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr x8, [x0, #32] +; NONEON-NOSVE-NEXT: ldr x9, [x0] +; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %a = load <2 x i256>, ptr %ap %val = trunc <2 x i256> %a to <2 x i64> store <2 x i64> %val, ptr %dest diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll index 9e56462df3889..bc046059f0bd5 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -19,6 +20,46 @@ define <16 x i8> @trunc_v16i16_v16i8(ptr %in) nounwind { ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v16i16_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] +; NONEON-NOSVE-NEXT: strb w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] +; NONEON-NOSVE-NEXT: strb w8, [sp, #43] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] +; NONEON-NOSVE-NEXT: strb w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: strb w8, [sp, #39] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: strb w8, [sp, #38] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] +; NONEON-NOSVE-NEXT: strb w8, [sp, #37] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: strb w8, [sp, #35] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: strb w8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] +; NONEON-NOSVE-NEXT: strb w8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrh w8, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %a = load <16 x i16>, ptr %in %b = trunc <16 x i16> %a to <16 x i8> ret <16 x i8> %b @@ -41,6 +82,129 @@ define void @trunc_v32i16_v32i8(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: add z1.b, z2.b, z2.b ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v32i16_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #208 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32] +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #112] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #128] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #144] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #160] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #16] +; NONEON-NOSVE-NEXT: stp q2, q0, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w25, [sp, #28] +; NONEON-NOSVE-NEXT: ldrh w26, [sp, #30] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #64] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrh w29, [sp, #52] +; NONEON-NOSVE-NEXT: ldrh w27, [sp, #48] +; NONEON-NOSVE-NEXT: ldrh w28, [sp, #50] +; NONEON-NOSVE-NEXT: ldrh w23, [sp, #24] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #54] +; NONEON-NOSVE-NEXT: ldrh w24, [sp, #26] +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #176] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w21, [sp, #20] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: ldrh w22, [sp, #22] +; NONEON-NOSVE-NEXT: strb w8, [sp, #92] +; NONEON-NOSVE-NEXT: add w8, w29, w29 +; NONEON-NOSVE-NEXT: ldrh w4, [sp, #44] +; NONEON-NOSVE-NEXT: strb w9, [sp, #91] +; NONEON-NOSVE-NEXT: add w9, w28, w28 +; NONEON-NOSVE-NEXT: ldrh w7, [sp, #46] +; NONEON-NOSVE-NEXT: strb w8, [sp, #90] +; NONEON-NOSVE-NEXT: add w8, w27, w27 +; NONEON-NOSVE-NEXT: ldrh w2, [sp, #40] +; NONEON-NOSVE-NEXT: strb w9, [sp, #89] +; NONEON-NOSVE-NEXT: add w9, w26, w26 +; NONEON-NOSVE-NEXT: ldrh w3, [sp, #42] +; NONEON-NOSVE-NEXT: strb w8, [sp, #88] +; NONEON-NOSVE-NEXT: add w8, w25, w25 +; NONEON-NOSVE-NEXT: ldrh w18, [sp, #36] +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #192] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w19, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w20, [sp, #18] +; NONEON-NOSVE-NEXT: strb w9, [sp, #87] +; NONEON-NOSVE-NEXT: add w9, w24, w24 +; NONEON-NOSVE-NEXT: ldrh w0, [sp, #38] +; NONEON-NOSVE-NEXT: strb w8, [sp, #86] +; NONEON-NOSVE-NEXT: add w8, w23, w23 +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #60] +; NONEON-NOSVE-NEXT: strb w9, [sp, #85] +; NONEON-NOSVE-NEXT: add w9, w22, w22 +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #62] +; NONEON-NOSVE-NEXT: add w6, w12, w12 +; NONEON-NOSVE-NEXT: strb w8, [sp, #84] +; NONEON-NOSVE-NEXT: add w8, w21, w21 +; NONEON-NOSVE-NEXT: add w5, w13, w13 +; NONEON-NOSVE-NEXT: strb w9, [sp, #83] +; NONEON-NOSVE-NEXT: add w9, w20, w20 +; NONEON-NOSVE-NEXT: strb w8, [sp, #82] +; NONEON-NOSVE-NEXT: add w8, w19, w19 +; NONEON-NOSVE-NEXT: ldrh w16, [sp, #32] +; NONEON-NOSVE-NEXT: strb w9, [sp, #81] +; NONEON-NOSVE-NEXT: add w9, w7, w7 +; NONEON-NOSVE-NEXT: ldrh w17, [sp, #34] +; NONEON-NOSVE-NEXT: strb w8, [sp, #80] +; NONEON-NOSVE-NEXT: add w8, w4, w4 +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #76] +; NONEON-NOSVE-NEXT: strb w9, [sp, #111] +; NONEON-NOSVE-NEXT: add w9, w3, w3 +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #78] +; NONEON-NOSVE-NEXT: strb w8, [sp, #110] +; NONEON-NOSVE-NEXT: add w8, w2, w2 +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #72] +; NONEON-NOSVE-NEXT: strb w9, [sp, #109] +; NONEON-NOSVE-NEXT: add w9, w0, w0 +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #74] +; NONEON-NOSVE-NEXT: strb w8, [sp, #108] +; NONEON-NOSVE-NEXT: add w8, w18, w18 +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #68] +; NONEON-NOSVE-NEXT: strb w9, [sp, #107] +; NONEON-NOSVE-NEXT: add w9, w17, w17 +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #70] +; NONEON-NOSVE-NEXT: strb w8, [sp, #106] +; NONEON-NOSVE-NEXT: add w8, w16, w16 +; NONEON-NOSVE-NEXT: ldrh w30, [sp, #58] +; NONEON-NOSVE-NEXT: strb w9, [sp, #105] +; NONEON-NOSVE-NEXT: add w9, w15, w15 +; NONEON-NOSVE-NEXT: strb w8, [sp, #104] +; NONEON-NOSVE-NEXT: add w8, w14, w14 +; NONEON-NOSVE-NEXT: strb w9, [sp, #103] +; NONEON-NOSVE-NEXT: add w9, w13, w13 +; NONEON-NOSVE-NEXT: strb w8, [sp, #102] +; NONEON-NOSVE-NEXT: add w8, w12, w12 +; NONEON-NOSVE-NEXT: strb w9, [sp, #101] +; NONEON-NOSVE-NEXT: add w9, w11, w11 +; NONEON-NOSVE-NEXT: strb w8, [sp, #100] +; NONEON-NOSVE-NEXT: add w8, w10, w10 +; NONEON-NOSVE-NEXT: strb w9, [sp, #99] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w8, [sp, #98] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w5, [sp, #95] +; NONEON-NOSVE-NEXT: add w5, w30, w30 +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w6, [sp, #94] +; NONEON-NOSVE-NEXT: strb w5, [sp, #93] +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #192] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #97] +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #176] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w8, [sp, #96] +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #160] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #80] +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #144] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #128] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #112] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #208 +; NONEON-NOSVE-NEXT: ret %a = load <32 x i16>, ptr %in %b = trunc <32 x i16> %a to <32 x i8> %c = add <32 x i8> %b, %b @@ -76,6 +240,280 @@ define void @trunc_v64i16_v64i8(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: stp q0, q1, [x1, #32] ; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v64i16_v64i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #448 +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32] +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #416] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q5, q4, [x0] +; NONEON-NOSVE-NEXT: str x1, [sp, #152] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #432] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64] +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #400] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #96] +; NONEON-NOSVE-NEXT: stp q2, q4, [sp, #224] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #238] +; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #256] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #232] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #272] +; NONEON-NOSVE-NEXT: stp q5, q7, [sp, #160] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #230] +; NONEON-NOSVE-NEXT: add w21, w8, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #274] +; NONEON-NOSVE-NEXT: stp q6, q0, [sp, #192] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #228] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #226] +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #224] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #276] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #278] +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #270] +; NONEON-NOSVE-NEXT: ldrh w16, [sp, #268] +; NONEON-NOSVE-NEXT: ldrh w17, [sp, #266] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #280] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #282] +; NONEON-NOSVE-NEXT: ldrh w18, [sp, #264] +; NONEON-NOSVE-NEXT: ldrh w0, [sp, #262] +; NONEON-NOSVE-NEXT: ldrh w1, [sp, #260] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #284] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #286] +; NONEON-NOSVE-NEXT: ldrh w2, [sp, #258] +; NONEON-NOSVE-NEXT: ldrh w3, [sp, #256] +; NONEON-NOSVE-NEXT: ldrh w4, [sp, #254] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #208] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #210] +; NONEON-NOSVE-NEXT: ldrh w5, [sp, #252] +; NONEON-NOSVE-NEXT: ldrh w6, [sp, #250] +; NONEON-NOSVE-NEXT: ldrh w7, [sp, #248] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #212] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #214] +; NONEON-NOSVE-NEXT: ldrh w19, [sp, #246] +; NONEON-NOSVE-NEXT: ldrh w20, [sp, #244] +; NONEON-NOSVE-NEXT: ldrh w22, [sp, #242] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #216] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #218] +; NONEON-NOSVE-NEXT: ldrh w23, [sp, #240] +; NONEON-NOSVE-NEXT: ldrh w24, [sp, #174] +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #384] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #220] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #222] +; NONEON-NOSVE-NEXT: ldrh w25, [sp, #172] +; NONEON-NOSVE-NEXT: ldrh w26, [sp, #170] +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #368] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #176] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #178] +; NONEON-NOSVE-NEXT: ldrh w27, [sp, #168] +; NONEON-NOSVE-NEXT: ldrh w28, [sp, #166] +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #352] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #180] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #182] +; NONEON-NOSVE-NEXT: ldrh w29, [sp, #164] +; NONEON-NOSVE-NEXT: ldrh w30, [sp, #162] +; NONEON-NOSVE-NEXT: strb w21, [sp, #335] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #184] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #186] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #188] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #190] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #192] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #194] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #196] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #198] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #200] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #202] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #204] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #206] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #160] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #236] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: str w8, [sp, #20] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #234] +; NONEON-NOSVE-NEXT: strb w9, [sp, #334] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #333] +; NONEON-NOSVE-NEXT: add w8, w10, w10 +; NONEON-NOSVE-NEXT: strb w8, [sp, #332] +; NONEON-NOSVE-NEXT: add w8, w11, w11 +; NONEON-NOSVE-NEXT: strb w8, [sp, #331] +; NONEON-NOSVE-NEXT: add w8, w12, w12 +; NONEON-NOSVE-NEXT: strb w8, [sp, #330] +; NONEON-NOSVE-NEXT: add w8, w13, w13 +; NONEON-NOSVE-NEXT: strb w8, [sp, #329] +; NONEON-NOSVE-NEXT: add w8, w14, w14 +; NONEON-NOSVE-NEXT: strb w8, [sp, #328] +; NONEON-NOSVE-NEXT: add w8, w15, w15 +; NONEON-NOSVE-NEXT: strb w8, [sp, #327] +; NONEON-NOSVE-NEXT: add w8, w16, w16 +; NONEON-NOSVE-NEXT: strb w8, [sp, #326] +; NONEON-NOSVE-NEXT: add w8, w17, w17 +; NONEON-NOSVE-NEXT: strb w8, [sp, #325] +; NONEON-NOSVE-NEXT: add w8, w18, w18 +; NONEON-NOSVE-NEXT: strb w8, [sp, #324] +; NONEON-NOSVE-NEXT: add w8, w0, w0 +; NONEON-NOSVE-NEXT: strb w8, [sp, #323] +; NONEON-NOSVE-NEXT: add w8, w1, w1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #322] +; NONEON-NOSVE-NEXT: add w8, w2, w2 +; NONEON-NOSVE-NEXT: strb w8, [sp, #321] +; NONEON-NOSVE-NEXT: add w8, w3, w3 +; NONEON-NOSVE-NEXT: strb w8, [sp, #320] +; NONEON-NOSVE-NEXT: add w8, w4, w4 +; NONEON-NOSVE-NEXT: strb w8, [sp, #319] +; NONEON-NOSVE-NEXT: add w8, w5, w5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #318] +; NONEON-NOSVE-NEXT: add w8, w6, w6 +; NONEON-NOSVE-NEXT: strb w8, [sp, #317] +; NONEON-NOSVE-NEXT: add w8, w7, w7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #316] +; NONEON-NOSVE-NEXT: add w8, w19, w19 +; NONEON-NOSVE-NEXT: strb w8, [sp, #315] +; NONEON-NOSVE-NEXT: add w8, w20, w20 +; NONEON-NOSVE-NEXT: strb w8, [sp, #314] +; NONEON-NOSVE-NEXT: add w8, w22, w22 +; NONEON-NOSVE-NEXT: strb w8, [sp, #313] +; NONEON-NOSVE-NEXT: add w8, w23, w23 +; NONEON-NOSVE-NEXT: strb w8, [sp, #312] +; NONEON-NOSVE-NEXT: add w8, w24, w24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #311] +; NONEON-NOSVE-NEXT: add w8, w25, w25 +; NONEON-NOSVE-NEXT: strb w8, [sp, #310] +; NONEON-NOSVE-NEXT: add w8, w26, w26 +; NONEON-NOSVE-NEXT: strb w8, [sp, #309] +; NONEON-NOSVE-NEXT: add w8, w27, w27 +; NONEON-NOSVE-NEXT: strb w8, [sp, #308] +; NONEON-NOSVE-NEXT: add w8, w28, w28 +; NONEON-NOSVE-NEXT: strb w8, [sp, #307] +; NONEON-NOSVE-NEXT: add w8, w29, w29 +; NONEON-NOSVE-NEXT: strb w8, [sp, #306] +; NONEON-NOSVE-NEXT: add w8, w30, w30 +; NONEON-NOSVE-NEXT: strb w8, [sp, #305] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #432] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #416] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w8, [sp, #304] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #400] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #384] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w8, [sp, #303] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #368] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #352] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w8, [sp, #302] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #301] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #300] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #299] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #298] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #297] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #296] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #295] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #294] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #293] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #68] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #292] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #72] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #291] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #76] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #290] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #289] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #84] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #288] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #88] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp q1, q3, [sp, #288] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #351] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #92] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #350] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #349] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #100] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #348] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #104] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #347] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #108] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #346] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #345] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #116] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #344] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #120] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #343] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #124] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #342] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #341] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #132] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #340] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #136] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #339] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #140] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #338] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #144] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #337] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #148] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #336] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #152] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp q2, q0, [sp, #320] +; NONEON-NOSVE-NEXT: stp q3, q2, [x8] +; NONEON-NOSVE-NEXT: stp q0, q1, [x8, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #448 +; NONEON-NOSVE-NEXT: ret %a = load <64 x i16>, ptr %in %b = trunc <64 x i16> %a to <64 x i8> %c = add <64 x i8> %b, %b @@ -133,6 +571,602 @@ define void @trunc_v128i16_v128i8(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: stp q2, q3, [x1, #32] ; CHECK-NEXT: stp q4, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v128i16_v128i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: sub sp, sp, #800 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: str x1, [sp, #408] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] +; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #96] +; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #64] +; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #192] +; NONEON-NOSVE-NEXT: ldp q19, q18, [x0, #160] +; NONEON-NOSVE-NEXT: ldp q21, q20, [x0, #128] +; NONEON-NOSVE-NEXT: ldp q23, q22, [x0, #224] +; NONEON-NOSVE-NEXT: str q0, [sp, #592] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #606] +; NONEON-NOSVE-NEXT: str q19, [sp, #496] +; NONEON-NOSVE-NEXT: ldrh w10, [sp, #600] +; NONEON-NOSVE-NEXT: stp q18, q20, [sp, #512] +; NONEON-NOSVE-NEXT: ldrh w11, [sp, #598] +; NONEON-NOSVE-NEXT: ldrh w12, [sp, #596] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: stp q17, q23, [sp, #432] +; NONEON-NOSVE-NEXT: ldrh w13, [sp, #594] +; NONEON-NOSVE-NEXT: str w8, [sp, #64] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #432] +; NONEON-NOSVE-NEXT: ldrh w14, [sp, #592] +; NONEON-NOSVE-NEXT: stp q22, q16, [sp, #464] +; NONEON-NOSVE-NEXT: ldr w30, [sp, #64] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: str w8, [sp, #404] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #434] +; NONEON-NOSVE-NEXT: stp q4, q6, [sp, #560] +; NONEON-NOSVE-NEXT: str w8, [sp, #400] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #436] +; NONEON-NOSVE-NEXT: str q5, [sp, #544] +; NONEON-NOSVE-NEXT: str w8, [sp, #396] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #438] +; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #608] +; NONEON-NOSVE-NEXT: str w8, [sp, #392] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #440] +; NONEON-NOSVE-NEXT: ldrh w15, [sp, #638] +; NONEON-NOSVE-NEXT: stp q7, q21, [sp, #640] +; NONEON-NOSVE-NEXT: ldrh w16, [sp, #636] +; NONEON-NOSVE-NEXT: ldrh w17, [sp, #634] +; NONEON-NOSVE-NEXT: str w8, [sp, #388] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #442] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #666] +; NONEON-NOSVE-NEXT: str q3, [sp, #416] +; NONEON-NOSVE-NEXT: ldrh w18, [sp, #632] +; NONEON-NOSVE-NEXT: ldrh w0, [sp, #630] +; NONEON-NOSVE-NEXT: str w8, [sp, #384] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #444] +; NONEON-NOSVE-NEXT: ldrh w1, [sp, #628] +; NONEON-NOSVE-NEXT: ldrh w2, [sp, #626] +; NONEON-NOSVE-NEXT: ldrh w3, [sp, #624] +; NONEON-NOSVE-NEXT: ldrh w4, [sp, #622] +; NONEON-NOSVE-NEXT: str w8, [sp, #380] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #446] +; NONEON-NOSVE-NEXT: ldrh w5, [sp, #620] +; NONEON-NOSVE-NEXT: ldrh w6, [sp, #618] +; NONEON-NOSVE-NEXT: ldrh w7, [sp, #616] +; NONEON-NOSVE-NEXT: ldrh w19, [sp, #614] +; NONEON-NOSVE-NEXT: str w8, [sp, #376] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #480] +; NONEON-NOSVE-NEXT: ldrh w20, [sp, #612] +; NONEON-NOSVE-NEXT: ldrh w21, [sp, #610] +; NONEON-NOSVE-NEXT: ldrh w22, [sp, #608] +; NONEON-NOSVE-NEXT: ldrh w23, [sp, #430] +; NONEON-NOSVE-NEXT: str w8, [sp, #372] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #482] +; NONEON-NOSVE-NEXT: ldrh w24, [sp, #428] +; NONEON-NOSVE-NEXT: ldrh w25, [sp, #426] +; NONEON-NOSVE-NEXT: ldrh w26, [sp, #424] +; NONEON-NOSVE-NEXT: ldrh w27, [sp, #422] +; NONEON-NOSVE-NEXT: str w8, [sp, #368] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #484] +; NONEON-NOSVE-NEXT: ldrh w28, [sp, #420] +; NONEON-NOSVE-NEXT: ldrh w29, [sp, #418] +; NONEON-NOSVE-NEXT: strb w30, [sp, #767] +; NONEON-NOSVE-NEXT: str w8, [sp, #364] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #486] +; NONEON-NOSVE-NEXT: str w8, [sp, #360] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #488] +; NONEON-NOSVE-NEXT: str w8, [sp, #356] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #490] +; NONEON-NOSVE-NEXT: str w8, [sp, #352] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #492] +; NONEON-NOSVE-NEXT: str w8, [sp, #348] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #494] +; NONEON-NOSVE-NEXT: str w8, [sp, #344] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #448] +; NONEON-NOSVE-NEXT: str w8, [sp, #340] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #450] +; NONEON-NOSVE-NEXT: str w8, [sp, #336] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #452] +; NONEON-NOSVE-NEXT: str w8, [sp, #332] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #454] +; NONEON-NOSVE-NEXT: str w8, [sp, #328] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #456] +; NONEON-NOSVE-NEXT: str w8, [sp, #324] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #458] +; NONEON-NOSVE-NEXT: str w8, [sp, #320] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #460] +; NONEON-NOSVE-NEXT: str w8, [sp, #316] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #462] +; NONEON-NOSVE-NEXT: str w8, [sp, #312] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #464] +; NONEON-NOSVE-NEXT: str w8, [sp, #308] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #466] +; NONEON-NOSVE-NEXT: str w8, [sp, #304] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #468] +; NONEON-NOSVE-NEXT: str w8, [sp, #300] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #470] +; NONEON-NOSVE-NEXT: str w8, [sp, #296] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #472] +; NONEON-NOSVE-NEXT: str w8, [sp, #292] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #474] +; NONEON-NOSVE-NEXT: str w8, [sp, #288] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #476] +; NONEON-NOSVE-NEXT: str w8, [sp, #284] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #478] +; NONEON-NOSVE-NEXT: str w8, [sp, #280] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #656] +; NONEON-NOSVE-NEXT: str w8, [sp, #276] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #658] +; NONEON-NOSVE-NEXT: str w8, [sp, #272] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #660] +; NONEON-NOSVE-NEXT: str w8, [sp, #268] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #662] +; NONEON-NOSVE-NEXT: str w8, [sp, #264] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #664] +; NONEON-NOSVE-NEXT: str w8, [sp, #260] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #668] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #252] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #670] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #528] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #244] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #530] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #532] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #236] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #534] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #536] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #228] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #538] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #540] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #220] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #542] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #496] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #212] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #498] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #500] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #204] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #502] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #504] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #196] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #506] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #508] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #188] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #510] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #512] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #180] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #514] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #516] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #172] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #518] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #520] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #164] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #522] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #524] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #156] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #526] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #640] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #148] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #642] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #644] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #140] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #646] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #648] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #132] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #650] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #652] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #124] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #654] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #576] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #116] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #578] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #580] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #108] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #582] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #584] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #100] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #586] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #588] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #92] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #590] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #544] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #84] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #546] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #548] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #76] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #550] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #552] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #68] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #554] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #556] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #558] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #560] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #562] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #564] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #566] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #568] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #570] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #572] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #574] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #416] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #602] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #604] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: strb w8, [sp, #765] +; NONEON-NOSVE-NEXT: add w8, w10, w10 +; NONEON-NOSVE-NEXT: strb w8, [sp, #764] +; NONEON-NOSVE-NEXT: add w8, w11, w11 +; NONEON-NOSVE-NEXT: strb w8, [sp, #763] +; NONEON-NOSVE-NEXT: add w8, w12, w12 +; NONEON-NOSVE-NEXT: strb w8, [sp, #762] +; NONEON-NOSVE-NEXT: add w8, w13, w13 +; NONEON-NOSVE-NEXT: strb w8, [sp, #761] +; NONEON-NOSVE-NEXT: add w8, w14, w14 +; NONEON-NOSVE-NEXT: strb w8, [sp, #760] +; NONEON-NOSVE-NEXT: add w8, w15, w15 +; NONEON-NOSVE-NEXT: strb w8, [sp, #759] +; NONEON-NOSVE-NEXT: add w8, w16, w16 +; NONEON-NOSVE-NEXT: strb w8, [sp, #758] +; NONEON-NOSVE-NEXT: add w8, w17, w17 +; NONEON-NOSVE-NEXT: strb w8, [sp, #757] +; NONEON-NOSVE-NEXT: add w8, w18, w18 +; NONEON-NOSVE-NEXT: strb w8, [sp, #756] +; NONEON-NOSVE-NEXT: add w8, w0, w0 +; NONEON-NOSVE-NEXT: strb w8, [sp, #755] +; NONEON-NOSVE-NEXT: add w8, w1, w1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #754] +; NONEON-NOSVE-NEXT: add w8, w2, w2 +; NONEON-NOSVE-NEXT: strb w8, [sp, #753] +; NONEON-NOSVE-NEXT: add w8, w3, w3 +; NONEON-NOSVE-NEXT: strb w8, [sp, #752] +; NONEON-NOSVE-NEXT: add w8, w4, w4 +; NONEON-NOSVE-NEXT: strb w8, [sp, #751] +; NONEON-NOSVE-NEXT: add w8, w5, w5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #750] +; NONEON-NOSVE-NEXT: add w8, w6, w6 +; NONEON-NOSVE-NEXT: strb w8, [sp, #749] +; NONEON-NOSVE-NEXT: add w8, w7, w7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #748] +; NONEON-NOSVE-NEXT: add w8, w19, w19 +; NONEON-NOSVE-NEXT: strb w8, [sp, #747] +; NONEON-NOSVE-NEXT: add w8, w20, w20 +; NONEON-NOSVE-NEXT: strb w8, [sp, #746] +; NONEON-NOSVE-NEXT: add w8, w21, w21 +; NONEON-NOSVE-NEXT: strb w8, [sp, #745] +; NONEON-NOSVE-NEXT: add w8, w22, w22 +; NONEON-NOSVE-NEXT: strb w8, [sp, #744] +; NONEON-NOSVE-NEXT: add w8, w23, w23 +; NONEON-NOSVE-NEXT: strb w8, [sp, #743] +; NONEON-NOSVE-NEXT: add w8, w24, w24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #742] +; NONEON-NOSVE-NEXT: add w8, w25, w25 +; NONEON-NOSVE-NEXT: strb w8, [sp, #741] +; NONEON-NOSVE-NEXT: add w8, w26, w26 +; NONEON-NOSVE-NEXT: strb w8, [sp, #740] +; NONEON-NOSVE-NEXT: add w8, w27, w27 +; NONEON-NOSVE-NEXT: strb w8, [sp, #739] +; NONEON-NOSVE-NEXT: add w8, w28, w28 +; NONEON-NOSVE-NEXT: strb w8, [sp, #738] +; NONEON-NOSVE-NEXT: add w8, w29, w29 +; NONEON-NOSVE-NEXT: strb w8, [sp, #737] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #766] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #736] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #736] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #735] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #734] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #733] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #732] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #731] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #730] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #729] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #728] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #727] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #726] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #725] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #68] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #724] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #72] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #723] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #76] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #722] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #721] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #84] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #720] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #88] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #783] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #92] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #782] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #781] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #100] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #780] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #104] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #779] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #108] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #778] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #777] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #116] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #776] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #120] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #775] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #124] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #774] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #773] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #132] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #772] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #136] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #771] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #140] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #770] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #144] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #769] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #148] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #768] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #152] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #719] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #156] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #718] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #160] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #717] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #164] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #716] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #168] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #715] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #172] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #714] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #176] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #713] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #180] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #712] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #184] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #711] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #188] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #710] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #192] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #709] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #196] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #708] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #200] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #707] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #204] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #706] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #208] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #705] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #212] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #704] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #216] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp q6, q3, [sp, #704] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #799] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #220] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #798] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #224] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #797] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #228] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #796] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #232] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #795] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #236] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #794] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #240] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #793] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #244] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #792] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #248] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #791] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #252] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #790] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #256] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #789] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #260] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #788] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #264] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #787] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #268] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #786] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #272] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #785] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #276] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #784] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #280] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp q4, q7, [sp, #768] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #687] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #284] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #686] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #288] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #685] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #292] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #684] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #296] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #683] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #300] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #682] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #304] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #681] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #308] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #680] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #312] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #679] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #316] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #678] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #320] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #677] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #324] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #676] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #328] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #675] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #332] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #674] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #336] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #673] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #340] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #672] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #344] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #703] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #348] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #702] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #352] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #701] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #356] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #700] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #360] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #699] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #364] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #698] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #368] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #697] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #372] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #696] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #376] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #695] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #380] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #694] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #384] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #693] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #388] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #692] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #392] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #691] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #396] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #690] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #400] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #689] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #404] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #688] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #408] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #672] +; NONEON-NOSVE-NEXT: stp q1, q0, [x8] +; NONEON-NOSVE-NEXT: stp q4, q3, [x8, #32] +; NONEON-NOSVE-NEXT: stp q7, q6, [x8, #64] +; NONEON-NOSVE-NEXT: stp q2, q5, [x8, #96] +; NONEON-NOSVE-NEXT: add sp, sp, #800 +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ret %a = load <128 x i16>, ptr %in %b = trunc <128 x i16> %a to <128 x i8> %c = add <128 x i8> %b, %b @@ -155,6 +1189,26 @@ define <8 x i8> @trunc_v8i32_v8i8(ptr %in) nounwind { ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v8i32_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: strb w9, [sp, #47] +; NONEON-NOSVE-NEXT: strb w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: strb w9, [sp, #45] +; NONEON-NOSVE-NEXT: strb w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w9, [sp, #43] +; NONEON-NOSVE-NEXT: strb w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp] +; NONEON-NOSVE-NEXT: strb w9, [sp, #41] +; NONEON-NOSVE-NEXT: strb w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %a = load <8 x i32>, ptr %in %b = trunc <8 x i32> %a to <8 x i8> ret <8 x i8> %b @@ -178,6 +1232,42 @@ define <16 x i8> @trunc_v16i32_v16i8(ptr %in) nounwind { ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v16i32_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #80 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: str q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp q0, q3, [sp, #16] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: str q2, [sp] +; NONEON-NOSVE-NEXT: strb w9, [sp, #79] +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: strb w9, [sp, #77] +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: strb w9, [sp, #75] +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #48] +; NONEON-NOSVE-NEXT: strb w9, [sp, #73] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: strb w9, [sp, #71] +; NONEON-NOSVE-NEXT: strb w8, [sp, #70] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #32] +; NONEON-NOSVE-NEXT: strb w9, [sp, #69] +; NONEON-NOSVE-NEXT: strb w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: strb w9, [sp, #67] +; NONEON-NOSVE-NEXT: strb w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp] +; NONEON-NOSVE-NEXT: strb w9, [sp, #65] +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %a = load <16 x i32>, ptr %in %b = trunc <16 x i32> %a to <16 x i8> ret <16 x i8> %b @@ -215,6 +1305,117 @@ define void @trunc_v32i32_v32i8(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: add z1.b, z3.b, z3.b ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v32i32_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #272 +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32] +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #192] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q5, q4, [x0] +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #208] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #224] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64] +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #240] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #96] +; NONEON-NOSVE-NEXT: stp q2, q4, [sp, #80] +; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #112] +; NONEON-NOSVE-NEXT: stp q5, q7, [sp, #16] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #88] +; NONEON-NOSVE-NEXT: ldp w27, w28, [sp, #112] +; NONEON-NOSVE-NEXT: ldp w25, w26, [sp, #104] +; NONEON-NOSVE-NEXT: add w6, w8, w8 +; NONEON-NOSVE-NEXT: add w5, w9, w9 +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #256] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp w10, w8, [sp, #128] +; NONEON-NOSVE-NEXT: ldp w23, w24, [sp, #96] +; NONEON-NOSVE-NEXT: ldp w21, w22, [sp, #24] +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #8] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #120] +; NONEON-NOSVE-NEXT: stp q6, q0, [sp, #48] +; NONEON-NOSVE-NEXT: ldp w19, w20, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #176] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: strb w8, [sp, #155] +; NONEON-NOSVE-NEXT: add w8, w28, w28 +; NONEON-NOSVE-NEXT: strb w9, [sp, #154] +; NONEON-NOSVE-NEXT: add w9, w27, w27 +; NONEON-NOSVE-NEXT: strb w8, [sp, #153] +; NONEON-NOSVE-NEXT: add w8, w26, w26 +; NONEON-NOSVE-NEXT: strb w9, [sp, #152] +; NONEON-NOSVE-NEXT: add w9, w25, w25 +; NONEON-NOSVE-NEXT: ldp w4, w7, [sp, #56] +; NONEON-NOSVE-NEXT: strb w8, [sp, #151] +; NONEON-NOSVE-NEXT: add w8, w24, w24 +; NONEON-NOSVE-NEXT: strb w9, [sp, #150] +; NONEON-NOSVE-NEXT: add w9, w23, w23 +; NONEON-NOSVE-NEXT: ldp w2, w3, [sp, #48] +; NONEON-NOSVE-NEXT: strb w8, [sp, #149] +; NONEON-NOSVE-NEXT: add w8, w22, w22 +; NONEON-NOSVE-NEXT: strb w9, [sp, #148] +; NONEON-NOSVE-NEXT: add w9, w21, w21 +; NONEON-NOSVE-NEXT: ldp w18, w0, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #147] +; NONEON-NOSVE-NEXT: add w8, w20, w20 +; NONEON-NOSVE-NEXT: strb w9, [sp, #146] +; NONEON-NOSVE-NEXT: add w9, w19, w19 +; NONEON-NOSVE-NEXT: ldp w16, w17, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #145] +; NONEON-NOSVE-NEXT: add w8, w7, w7 +; NONEON-NOSVE-NEXT: strb w9, [sp, #144] +; NONEON-NOSVE-NEXT: add w9, w4, w4 +; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #72] +; NONEON-NOSVE-NEXT: strb w8, [sp, #175] +; NONEON-NOSVE-NEXT: add w8, w3, w3 +; NONEON-NOSVE-NEXT: strb w9, [sp, #174] +; NONEON-NOSVE-NEXT: add w9, w2, w2 +; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #64] +; NONEON-NOSVE-NEXT: strb w8, [sp, #173] +; NONEON-NOSVE-NEXT: add w8, w0, w0 +; NONEON-NOSVE-NEXT: strb w9, [sp, #172] +; NONEON-NOSVE-NEXT: add w9, w18, w18 +; NONEON-NOSVE-NEXT: ldp w10, w11, [sp, #136] +; NONEON-NOSVE-NEXT: strb w8, [sp, #171] +; NONEON-NOSVE-NEXT: add w8, w17, w17 +; NONEON-NOSVE-NEXT: strb w9, [sp, #170] +; NONEON-NOSVE-NEXT: add w9, w16, w16 +; NONEON-NOSVE-NEXT: strb w8, [sp, #169] +; NONEON-NOSVE-NEXT: add w8, w15, w15 +; NONEON-NOSVE-NEXT: strb w9, [sp, #168] +; NONEON-NOSVE-NEXT: add w9, w14, w14 +; NONEON-NOSVE-NEXT: strb w8, [sp, #167] +; NONEON-NOSVE-NEXT: add w8, w13, w13 +; NONEON-NOSVE-NEXT: strb w9, [sp, #166] +; NONEON-NOSVE-NEXT: add w9, w12, w12 +; NONEON-NOSVE-NEXT: ldp w29, w30, [sp, #80] +; NONEON-NOSVE-NEXT: strb w8, [sp, #165] +; NONEON-NOSVE-NEXT: add w8, w11, w11 +; NONEON-NOSVE-NEXT: strb w9, [sp, #164] +; NONEON-NOSVE-NEXT: add w9, w10, w10 +; NONEON-NOSVE-NEXT: strb w8, [sp, #163] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #162] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w5, [sp, #159] +; NONEON-NOSVE-NEXT: add w5, w30, w30 +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w6, [sp, #158] +; NONEON-NOSVE-NEXT: add w6, w29, w29 +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: strb w5, [sp, #157] +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #256] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w6, [sp, #156] +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #240] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w8, [sp, #161] +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #224] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #160] +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #208] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #144] +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #192] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #176] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #272 +; NONEON-NOSVE-NEXT: ret %a = load <32 x i32>, ptr %in %b = trunc <32 x i32> %a to <32 x i8> %c = add <32 x i8> %b, %b @@ -279,6 +1480,277 @@ define void @trunc_v64i32_v64i8(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: stp q1, q2, [x1, #32] ; CHECK-NEXT: stp q3, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v64i32_v64i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: sub sp, sp, #480 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #96] +; NONEON-NOSVE-NEXT: str x1, [sp, #152] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #64] +; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #128] +; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q7, q6, [x0] +; NONEON-NOSVE-NEXT: ldp q19, q18, [x0, #224] +; NONEON-NOSVE-NEXT: ldp q21, q20, [x0, #192] +; NONEON-NOSVE-NEXT: ldp q23, q22, [x0, #160] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #288] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #316] +; NONEON-NOSVE-NEXT: str q18, [sp, #208] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #304] +; NONEON-NOSVE-NEXT: stp q21, q19, [sp, #176] +; NONEON-NOSVE-NEXT: ldr w11, [sp, #296] +; NONEON-NOSVE-NEXT: ldr w12, [sp, #292] +; NONEON-NOSVE-NEXT: add w20, w8, w8 +; NONEON-NOSVE-NEXT: stp q20, q23, [sp, #224] +; NONEON-NOSVE-NEXT: ldr w13, [sp, #288] +; NONEON-NOSVE-NEXT: stp q22, q16, [sp, #256] +; NONEON-NOSVE-NEXT: ldr w22, [sp, #312] +; NONEON-NOSVE-NEXT: stp q3, q17, [sp, #384] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #400] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #404] +; NONEON-NOSVE-NEXT: str q7, [sp, #160] +; NONEON-NOSVE-NEXT: stp q2, q4, [sp, #320] +; NONEON-NOSVE-NEXT: ldr w18, [sp, #396] +; NONEON-NOSVE-NEXT: ldr w0, [sp, #392] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldr w9, [sp, #408] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #412] +; NONEON-NOSVE-NEXT: ldr w14, [sp, #332] +; NONEON-NOSVE-NEXT: ldr w15, [sp, #328] +; NONEON-NOSVE-NEXT: ldr w16, [sp, #324] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldr w9, [sp, #272] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #276] +; NONEON-NOSVE-NEXT: ldr w17, [sp, #320] +; NONEON-NOSVE-NEXT: ldr w1, [sp, #388] +; NONEON-NOSVE-NEXT: ldr w2, [sp, #384] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldr w9, [sp, #280] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #284] +; NONEON-NOSVE-NEXT: ldr w3, [sp, #348] +; NONEON-NOSVE-NEXT: ldr w4, [sp, #344] +; NONEON-NOSVE-NEXT: ldr w5, [sp, #340] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldr w6, [sp, #336] +; NONEON-NOSVE-NEXT: stp q6, q5, [sp, #352] +; NONEON-NOSVE-NEXT: ldr w7, [sp, #380] +; NONEON-NOSVE-NEXT: ldr w19, [sp, #376] +; NONEON-NOSVE-NEXT: ldr w21, [sp, #372] +; NONEON-NOSVE-NEXT: ldr w23, [sp, #368] +; NONEON-NOSVE-NEXT: ldr w24, [sp, #364] +; NONEON-NOSVE-NEXT: ldr w25, [sp, #360] +; NONEON-NOSVE-NEXT: ldr w26, [sp, #356] +; NONEON-NOSVE-NEXT: ldr w27, [sp, #352] +; NONEON-NOSVE-NEXT: strb w20, [sp, #463] +; NONEON-NOSVE-NEXT: add w20, w22, w22 +; NONEON-NOSVE-NEXT: strb w20, [sp, #462] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #240] +; NONEON-NOSVE-NEXT: ldp w29, w28, [sp, #168] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #248] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldr w9, [sp, #256] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #260] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldr w9, [sp, #264] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #268] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #176] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #184] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #224] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #232] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #192] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #200] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #208] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #216] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldr w9, [sp, #300] +; NONEON-NOSVE-NEXT: ldp w8, w30, [sp, #160] +; NONEON-NOSVE-NEXT: str w8, [sp, #20] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldr w8, [sp, #308] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #461] +; NONEON-NOSVE-NEXT: add w8, w10, w10 +; NONEON-NOSVE-NEXT: strb w8, [sp, #460] +; NONEON-NOSVE-NEXT: add w8, w9, w9 +; NONEON-NOSVE-NEXT: strb w8, [sp, #459] +; NONEON-NOSVE-NEXT: add w8, w11, w11 +; NONEON-NOSVE-NEXT: strb w8, [sp, #458] +; NONEON-NOSVE-NEXT: add w8, w12, w12 +; NONEON-NOSVE-NEXT: strb w8, [sp, #457] +; NONEON-NOSVE-NEXT: add w8, w13, w13 +; NONEON-NOSVE-NEXT: strb w8, [sp, #456] +; NONEON-NOSVE-NEXT: add w8, w14, w14 +; NONEON-NOSVE-NEXT: strb w8, [sp, #455] +; NONEON-NOSVE-NEXT: add w8, w15, w15 +; NONEON-NOSVE-NEXT: strb w8, [sp, #454] +; NONEON-NOSVE-NEXT: add w8, w16, w16 +; NONEON-NOSVE-NEXT: strb w8, [sp, #453] +; NONEON-NOSVE-NEXT: add w8, w17, w17 +; NONEON-NOSVE-NEXT: strb w8, [sp, #452] +; NONEON-NOSVE-NEXT: add w8, w18, w18 +; NONEON-NOSVE-NEXT: strb w8, [sp, #451] +; NONEON-NOSVE-NEXT: add w8, w0, w0 +; NONEON-NOSVE-NEXT: strb w8, [sp, #450] +; NONEON-NOSVE-NEXT: add w8, w1, w1 +; NONEON-NOSVE-NEXT: strb w8, [sp, #449] +; NONEON-NOSVE-NEXT: add w8, w2, w2 +; NONEON-NOSVE-NEXT: strb w8, [sp, #448] +; NONEON-NOSVE-NEXT: add w8, w3, w3 +; NONEON-NOSVE-NEXT: strb w8, [sp, #447] +; NONEON-NOSVE-NEXT: add w8, w4, w4 +; NONEON-NOSVE-NEXT: strb w8, [sp, #446] +; NONEON-NOSVE-NEXT: add w8, w5, w5 +; NONEON-NOSVE-NEXT: strb w8, [sp, #445] +; NONEON-NOSVE-NEXT: add w8, w6, w6 +; NONEON-NOSVE-NEXT: strb w8, [sp, #444] +; NONEON-NOSVE-NEXT: add w8, w7, w7 +; NONEON-NOSVE-NEXT: strb w8, [sp, #443] +; NONEON-NOSVE-NEXT: add w8, w19, w19 +; NONEON-NOSVE-NEXT: strb w8, [sp, #442] +; NONEON-NOSVE-NEXT: add w8, w21, w21 +; NONEON-NOSVE-NEXT: strb w8, [sp, #441] +; NONEON-NOSVE-NEXT: add w8, w23, w23 +; NONEON-NOSVE-NEXT: strb w8, [sp, #440] +; NONEON-NOSVE-NEXT: add w8, w24, w24 +; NONEON-NOSVE-NEXT: strb w8, [sp, #439] +; NONEON-NOSVE-NEXT: add w8, w25, w25 +; NONEON-NOSVE-NEXT: strb w8, [sp, #438] +; NONEON-NOSVE-NEXT: add w8, w26, w26 +; NONEON-NOSVE-NEXT: strb w8, [sp, #437] +; NONEON-NOSVE-NEXT: add w8, w27, w27 +; NONEON-NOSVE-NEXT: strb w8, [sp, #436] +; NONEON-NOSVE-NEXT: add w8, w28, w28 +; NONEON-NOSVE-NEXT: strb w8, [sp, #435] +; NONEON-NOSVE-NEXT: add w8, w29, w29 +; NONEON-NOSVE-NEXT: strb w8, [sp, #434] +; NONEON-NOSVE-NEXT: add w8, w30, w30 +; NONEON-NOSVE-NEXT: strb w8, [sp, #433] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #432] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #431] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #430] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #429] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #428] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #427] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #426] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #425] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #424] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #423] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #422] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #421] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #68] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #420] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #72] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #419] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #76] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #418] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #417] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #84] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #416] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #88] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp q1, q3, [sp, #416] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #479] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #92] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #478] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #477] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #100] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #476] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #104] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #475] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #108] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #474] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #473] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #116] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #472] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #120] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #471] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #124] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #470] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #469] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #132] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #468] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #136] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #467] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #140] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #466] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #144] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #465] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #148] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w8, [sp, #464] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #152] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp q2, q0, [sp, #448] +; NONEON-NOSVE-NEXT: stp q3, q2, [x8] +; NONEON-NOSVE-NEXT: stp q0, q1, [x8, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #480 +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ret %a = load <64 x i32>, ptr %in %b = trunc <64 x i32> %a to <64 x i8> %c = add <64 x i8> %b, %b @@ -300,6 +1772,26 @@ define <8 x i16> @trunc_v8i32_v8i16(ptr %in) nounwind { ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v8i32_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: strh w9, [sp, #46] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] +; NONEON-NOSVE-NEXT: strh w9, [sp, #42] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: strh w9, [sp, #38] +; NONEON-NOSVE-NEXT: strh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp] +; NONEON-NOSVE-NEXT: strh w9, [sp, #34] +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %a = load <8 x i32>, ptr %in %b = trunc <8 x i32> %a to <8 x i16> ret <8 x i16> %b @@ -322,6 +1814,58 @@ define void @trunc_v16i32_v16i16(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: add z1.h, z2.h, z2.h ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v16i32_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] +; NONEON-NOSVE-NEXT: stp q3, q1, [sp] +; NONEON-NOSVE-NEXT: stp q2, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldp w2, w3, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w4, w5, [sp, #8] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: ldp w18, w0, [sp] +; NONEON-NOSVE-NEXT: ldp w16, w17, [sp, #24] +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: add w8, w3, w3 +; NONEON-NOSVE-NEXT: strh w9, [sp, #76] +; NONEON-NOSVE-NEXT: add w9, w2, w2 +; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #74] +; NONEON-NOSVE-NEXT: add w8, w5, w5 +; NONEON-NOSVE-NEXT: strh w9, [sp, #72] +; NONEON-NOSVE-NEXT: add w9, w4, w4 +; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #56] +; NONEON-NOSVE-NEXT: strh w8, [sp, #70] +; NONEON-NOSVE-NEXT: add w8, w0, w0 +; NONEON-NOSVE-NEXT: strh w9, [sp, #68] +; NONEON-NOSVE-NEXT: add w9, w18, w18 +; NONEON-NOSVE-NEXT: ldp w10, w11, [sp, #48] +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: add w8, w17, w17 +; NONEON-NOSVE-NEXT: strh w9, [sp, #64] +; NONEON-NOSVE-NEXT: add w9, w16, w16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #94] +; NONEON-NOSVE-NEXT: add w8, w15, w15 +; NONEON-NOSVE-NEXT: strh w9, [sp, #92] +; NONEON-NOSVE-NEXT: add w9, w14, w14 +; NONEON-NOSVE-NEXT: strh w8, [sp, #90] +; NONEON-NOSVE-NEXT: add w8, w13, w13 +; NONEON-NOSVE-NEXT: strh w9, [sp, #88] +; NONEON-NOSVE-NEXT: add w9, w12, w12 +; NONEON-NOSVE-NEXT: strh w8, [sp, #86] +; NONEON-NOSVE-NEXT: add w8, w11, w11 +; NONEON-NOSVE-NEXT: strh w9, [sp, #84] +; NONEON-NOSVE-NEXT: add w9, w10, w10 +; NONEON-NOSVE-NEXT: strh w8, [sp, #82] +; NONEON-NOSVE-NEXT: strh w9, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %a = load <16 x i32>, ptr %in %b = trunc <16 x i32> %a to <16 x i16> %c = add <16 x i16> %b, %b @@ -357,6 +1901,119 @@ define void @trunc_v32i32_v32i16(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: stp q0, q1, [x1, #32] ; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v32i32_v32i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #304 +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32] +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #224] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q5, q4, [x0] +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #240] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #256] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64] +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #272] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #96] +; NONEON-NOSVE-NEXT: stp q2, q4, [sp, #80] +; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #112] +; NONEON-NOSVE-NEXT: stp q5, q7, [sp, #16] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #88] +; NONEON-NOSVE-NEXT: ldp w27, w28, [sp, #112] +; NONEON-NOSVE-NEXT: ldp w25, w26, [sp, #104] +; NONEON-NOSVE-NEXT: add w6, w8, w8 +; NONEON-NOSVE-NEXT: add w5, w9, w9 +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #288] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp w10, w8, [sp, #128] +; NONEON-NOSVE-NEXT: ldp w23, w24, [sp, #96] +; NONEON-NOSVE-NEXT: ldp w21, w22, [sp, #24] +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #8] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #120] +; NONEON-NOSVE-NEXT: stp q6, q0, [sp, #48] +; NONEON-NOSVE-NEXT: ldp w19, w20, [sp, #16] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #208] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: strh w8, [sp, #182] +; NONEON-NOSVE-NEXT: add w8, w28, w28 +; NONEON-NOSVE-NEXT: strh w9, [sp, #180] +; NONEON-NOSVE-NEXT: add w9, w27, w27 +; NONEON-NOSVE-NEXT: strh w8, [sp, #178] +; NONEON-NOSVE-NEXT: add w8, w26, w26 +; NONEON-NOSVE-NEXT: strh w9, [sp, #176] +; NONEON-NOSVE-NEXT: add w9, w25, w25 +; NONEON-NOSVE-NEXT: ldp w4, w7, [sp, #56] +; NONEON-NOSVE-NEXT: strh w8, [sp, #174] +; NONEON-NOSVE-NEXT: add w8, w24, w24 +; NONEON-NOSVE-NEXT: strh w9, [sp, #172] +; NONEON-NOSVE-NEXT: add w9, w23, w23 +; NONEON-NOSVE-NEXT: ldp w2, w3, [sp, #48] +; NONEON-NOSVE-NEXT: strh w8, [sp, #170] +; NONEON-NOSVE-NEXT: add w8, w22, w22 +; NONEON-NOSVE-NEXT: strh w9, [sp, #168] +; NONEON-NOSVE-NEXT: add w9, w21, w21 +; NONEON-NOSVE-NEXT: ldp w18, w0, [sp, #40] +; NONEON-NOSVE-NEXT: strh w8, [sp, #166] +; NONEON-NOSVE-NEXT: add w8, w20, w20 +; NONEON-NOSVE-NEXT: strh w9, [sp, #164] +; NONEON-NOSVE-NEXT: add w9, w19, w19 +; NONEON-NOSVE-NEXT: ldp w16, w17, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #162] +; NONEON-NOSVE-NEXT: add w8, w7, w7 +; NONEON-NOSVE-NEXT: strh w9, [sp, #160] +; NONEON-NOSVE-NEXT: add w9, w4, w4 +; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #72] +; NONEON-NOSVE-NEXT: strh w8, [sp, #158] +; NONEON-NOSVE-NEXT: add w8, w3, w3 +; NONEON-NOSVE-NEXT: strh w9, [sp, #156] +; NONEON-NOSVE-NEXT: add w9, w2, w2 +; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #64] +; NONEON-NOSVE-NEXT: strh w8, [sp, #154] +; NONEON-NOSVE-NEXT: add w8, w0, w0 +; NONEON-NOSVE-NEXT: strh w9, [sp, #152] +; NONEON-NOSVE-NEXT: add w9, w18, w18 +; NONEON-NOSVE-NEXT: ldp w10, w11, [sp, #136] +; NONEON-NOSVE-NEXT: strh w8, [sp, #150] +; NONEON-NOSVE-NEXT: add w8, w17, w17 +; NONEON-NOSVE-NEXT: strh w9, [sp, #148] +; NONEON-NOSVE-NEXT: add w9, w16, w16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #146] +; NONEON-NOSVE-NEXT: add w8, w15, w15 +; NONEON-NOSVE-NEXT: strh w9, [sp, #144] +; NONEON-NOSVE-NEXT: add w9, w14, w14 +; NONEON-NOSVE-NEXT: strh w8, [sp, #206] +; NONEON-NOSVE-NEXT: add w8, w13, w13 +; NONEON-NOSVE-NEXT: strh w9, [sp, #204] +; NONEON-NOSVE-NEXT: add w9, w12, w12 +; NONEON-NOSVE-NEXT: ldp w29, w30, [sp, #80] +; NONEON-NOSVE-NEXT: strh w8, [sp, #202] +; NONEON-NOSVE-NEXT: add w8, w11, w11 +; NONEON-NOSVE-NEXT: strh w9, [sp, #200] +; NONEON-NOSVE-NEXT: add w9, w10, w10 +; NONEON-NOSVE-NEXT: strh w8, [sp, #198] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w9, [sp, #196] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w5, [sp, #190] +; NONEON-NOSVE-NEXT: add w5, w30, w30 +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w6, [sp, #188] +; NONEON-NOSVE-NEXT: add w6, w29, w29 +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: strh w5, [sp, #186] +; NONEON-NOSVE-NEXT: ldp q1, q3, [sp, #144] +; NONEON-NOSVE-NEXT: strh w6, [sp, #184] +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #288] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #194] +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #272] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w9, [sp, #192] +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #256] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp q2, q0, [sp, #176] +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #240] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #224] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #208] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: stp q3, q2, [x1] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #304 +; NONEON-NOSVE-NEXT: ret %a = load <32 x i32>, ptr %in %b = trunc <32 x i32> %a to <32 x i16> %c = add <32 x i16> %b, %b @@ -414,6 +2071,280 @@ define void @trunc_v64i32_v64i16(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: stp q2, q3, [x1, #32] ; CHECK-NEXT: stp q4, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v64i32_v64i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: sub sp, sp, #528 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: mov x5, x1 +; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #192] +; NONEON-NOSVE-NEXT: ldp q23, q22, [x0, #224] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] +; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #96] +; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #64] +; NONEON-NOSVE-NEXT: ldp q19, q18, [x0, #160] +; NONEON-NOSVE-NEXT: ldp q21, q20, [x0, #128] +; NONEON-NOSVE-NEXT: str q0, [sp, #320] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #332] +; NONEON-NOSVE-NEXT: stp q17, q23, [sp, #160] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #320] +; NONEON-NOSVE-NEXT: stp q22, q16, [sp, #192] +; NONEON-NOSVE-NEXT: ldr w23, [sp, #328] +; NONEON-NOSVE-NEXT: add w21, w8, w8 +; NONEON-NOSVE-NEXT: stp q18, q20, [sp, #240] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #160] +; NONEON-NOSVE-NEXT: stp q7, q21, [sp, #368] +; NONEON-NOSVE-NEXT: str q19, [sp, #224] +; NONEON-NOSVE-NEXT: ldr w29, [sp, #380] +; NONEON-NOSVE-NEXT: ldr w30, [sp, #376] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #168] +; NONEON-NOSVE-NEXT: stp q4, q6, [sp, #288] +; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #336] +; NONEON-NOSVE-NEXT: ldr w3, [sp, #300] +; NONEON-NOSVE-NEXT: ldr w4, [sp, #296] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldr w11, [sp, #360] +; NONEON-NOSVE-NEXT: ldr w12, [sp, #356] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #208] +; NONEON-NOSVE-NEXT: ldr w13, [sp, #352] +; NONEON-NOSVE-NEXT: ldr w14, [sp, #348] +; NONEON-NOSVE-NEXT: ldr w15, [sp, #344] +; NONEON-NOSVE-NEXT: str q3, [sp, #144] +; NONEON-NOSVE-NEXT: ldr w16, [sp, #340] +; NONEON-NOSVE-NEXT: ldr w17, [sp, #336] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldr w6, [sp, #292] +; NONEON-NOSVE-NEXT: ldr w7, [sp, #288] +; NONEON-NOSVE-NEXT: str q5, [sp, #272] +; NONEON-NOSVE-NEXT: ldr w25, [sp, #316] +; NONEON-NOSVE-NEXT: ldr w26, [sp, #312] +; NONEON-NOSVE-NEXT: ldr w19, [sp, #284] +; NONEON-NOSVE-NEXT: ldr w20, [sp, #280] +; NONEON-NOSVE-NEXT: ldr w22, [sp, #276] +; NONEON-NOSVE-NEXT: ldr w24, [sp, #272] +; NONEON-NOSVE-NEXT: ldr w27, [sp, #308] +; NONEON-NOSVE-NEXT: ldr w28, [sp, #304] +; NONEON-NOSVE-NEXT: strh w21, [sp, #494] +; NONEON-NOSVE-NEXT: add w21, w23, w23 +; NONEON-NOSVE-NEXT: strh w21, [sp, #492] +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #216] +; NONEON-NOSVE-NEXT: ldp w0, w18, [sp, #152] +; NONEON-NOSVE-NEXT: ldp w2, w1, [sp, #144] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #176] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #184] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #192] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #200] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldr w9, [sp, #384] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #388] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldr w9, [sp, #392] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #396] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldr w9, [sp, #256] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #260] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldr w9, [sp, #264] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #268] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #224] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #232] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #240] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #248] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldr w9, [sp, #368] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #372] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldr w8, [sp, #324] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #364] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #490] +; NONEON-NOSVE-NEXT: add w8, w10, w10 +; NONEON-NOSVE-NEXT: strh w8, [sp, #488] +; NONEON-NOSVE-NEXT: add w8, w9, w9 +; NONEON-NOSVE-NEXT: strh w8, [sp, #486] +; NONEON-NOSVE-NEXT: add w8, w11, w11 +; NONEON-NOSVE-NEXT: strh w8, [sp, #484] +; NONEON-NOSVE-NEXT: add w8, w12, w12 +; NONEON-NOSVE-NEXT: strh w8, [sp, #482] +; NONEON-NOSVE-NEXT: add w8, w13, w13 +; NONEON-NOSVE-NEXT: strh w8, [sp, #480] +; NONEON-NOSVE-NEXT: add w8, w14, w14 +; NONEON-NOSVE-NEXT: strh w8, [sp, #478] +; NONEON-NOSVE-NEXT: add w8, w15, w15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #476] +; NONEON-NOSVE-NEXT: add w8, w16, w16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #474] +; NONEON-NOSVE-NEXT: add w8, w17, w17 +; NONEON-NOSVE-NEXT: strh w8, [sp, #472] +; NONEON-NOSVE-NEXT: add w8, w18, w18 +; NONEON-NOSVE-NEXT: strh w8, [sp, #470] +; NONEON-NOSVE-NEXT: add w8, w0, w0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #468] +; NONEON-NOSVE-NEXT: add w8, w1, w1 +; NONEON-NOSVE-NEXT: strh w8, [sp, #466] +; NONEON-NOSVE-NEXT: add w8, w2, w2 +; NONEON-NOSVE-NEXT: strh w8, [sp, #464] +; NONEON-NOSVE-NEXT: add w8, w3, w3 +; NONEON-NOSVE-NEXT: strh w8, [sp, #462] +; NONEON-NOSVE-NEXT: add w8, w4, w4 +; NONEON-NOSVE-NEXT: strh w8, [sp, #460] +; NONEON-NOSVE-NEXT: add w8, w6, w6 +; NONEON-NOSVE-NEXT: strh w8, [sp, #458] +; NONEON-NOSVE-NEXT: add w8, w7, w7 +; NONEON-NOSVE-NEXT: strh w8, [sp, #456] +; NONEON-NOSVE-NEXT: add w8, w19, w19 +; NONEON-NOSVE-NEXT: strh w8, [sp, #454] +; NONEON-NOSVE-NEXT: add w8, w20, w20 +; NONEON-NOSVE-NEXT: strh w8, [sp, #452] +; NONEON-NOSVE-NEXT: add w8, w22, w22 +; NONEON-NOSVE-NEXT: strh w8, [sp, #450] +; NONEON-NOSVE-NEXT: add w8, w24, w24 +; NONEON-NOSVE-NEXT: strh w8, [sp, #448] +; NONEON-NOSVE-NEXT: add w8, w25, w25 +; NONEON-NOSVE-NEXT: strh w8, [sp, #510] +; NONEON-NOSVE-NEXT: add w8, w26, w26 +; NONEON-NOSVE-NEXT: strh w8, [sp, #508] +; NONEON-NOSVE-NEXT: add w8, w27, w27 +; NONEON-NOSVE-NEXT: strh w8, [sp, #506] +; NONEON-NOSVE-NEXT: add w8, w28, w28 +; NONEON-NOSVE-NEXT: strh w8, [sp, #504] +; NONEON-NOSVE-NEXT: add w8, w29, w29 +; NONEON-NOSVE-NEXT: strh w8, [sp, #502] +; NONEON-NOSVE-NEXT: add w8, w30, w30 +; NONEON-NOSVE-NEXT: strh w8, [sp, #500] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #464] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #498] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #496] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #446] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #444] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #442] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #440] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #438] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #436] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #434] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #432] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp q6, q3, [sp, #432] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #526] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #524] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #522] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #520] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #518] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #68] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #516] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #72] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #514] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #76] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #512] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp q4, q7, [sp, #496] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #414] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #84] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #412] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #88] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #410] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #92] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #408] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #406] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #100] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #404] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #104] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #402] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #108] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #400] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #430] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #116] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #428] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #120] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #426] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #124] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #424] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #422] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #132] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #420] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #136] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #418] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #140] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w8, [sp, #416] +; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #400] +; NONEON-NOSVE-NEXT: stp q1, q0, [x5] +; NONEON-NOSVE-NEXT: stp q4, q3, [x5, #32] +; NONEON-NOSVE-NEXT: stp q7, q6, [x5, #64] +; NONEON-NOSVE-NEXT: stp q2, q5, [x5, #96] +; NONEON-NOSVE-NEXT: add sp, sp, #528 +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ret %a = load <64 x i32>, ptr %in %b = trunc <64 x i32> %a to <64 x i16> %c = add <64 x i16> %b, %b @@ -437,6 +2368,20 @@ define <4 x i8> @trunc_v4i64_v4i8(ptr %in) nounwind { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v4i64_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldp x8, x10, [sp] +; NONEON-NOSVE-NEXT: strh w9, [sp, #46] +; NONEON-NOSVE-NEXT: strh w10, [sp, #42] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %a = load <4 x i64>, ptr %in %b = trunc <4 x i64> %a to <4 x i8> ret <4 x i8> %b @@ -461,6 +2406,31 @@ define <8 x i8> @trunc_v8i64_v8i8(ptr %in) nounwind { ; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v8i64_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #80 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: str q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp q0, q3, [sp, #16] +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: str q2, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldp x8, x10, [sp, #48] +; NONEON-NOSVE-NEXT: strb w9, [sp, #79] +; NONEON-NOSVE-NEXT: strb w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldp x8, x11, [sp, #32] +; NONEON-NOSVE-NEXT: strb w10, [sp, #77] +; NONEON-NOSVE-NEXT: strb w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: strb w11, [sp, #75] +; NONEON-NOSVE-NEXT: strb w8, [sp, #73] +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #72] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %a = load <8 x i64>, ptr %in %b = trunc <8 x i64> %a to <8 x i8> ret <8 x i8> %b @@ -499,6 +2469,51 @@ define <16 x i8> @trunc_v16i64_v16i8(ptr %in) nounwind { ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v16i64_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #144 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #96] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: ldp q4, q5, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q6, q7, [x0, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #16] +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: str q3, [sp, #80] +; NONEON-NOSVE-NEXT: str q2, [sp] +; NONEON-NOSVE-NEXT: stp q7, q5, [sp, #48] +; NONEON-NOSVE-NEXT: strb w8, [sp, #142] +; NONEON-NOSVE-NEXT: ldp x8, x10, [sp, #16] +; NONEON-NOSVE-NEXT: stp q4, q6, [sp, #96] +; NONEON-NOSVE-NEXT: strb w9, [sp, #143] +; NONEON-NOSVE-NEXT: strb w8, [sp, #140] +; NONEON-NOSVE-NEXT: ldp x8, x11, [sp, #48] +; NONEON-NOSVE-NEXT: strb w10, [sp, #141] +; NONEON-NOSVE-NEXT: strb w8, [sp, #138] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #120] +; NONEON-NOSVE-NEXT: strb w11, [sp, #139] +; NONEON-NOSVE-NEXT: strb w8, [sp, #137] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #112] +; NONEON-NOSVE-NEXT: strb w8, [sp, #136] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #72] +; NONEON-NOSVE-NEXT: strb w8, [sp, #135] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #64] +; NONEON-NOSVE-NEXT: strb w8, [sp, #134] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #104] +; NONEON-NOSVE-NEXT: strb w8, [sp, #133] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #96] +; NONEON-NOSVE-NEXT: strb w8, [sp, #132] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #88] +; NONEON-NOSVE-NEXT: strb w8, [sp, #131] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #80] +; NONEON-NOSVE-NEXT: strb w8, [sp, #130] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: strb w8, [sp, #129] +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: strb w8, [sp, #128] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #128] +; NONEON-NOSVE-NEXT: add sp, sp, #144 +; NONEON-NOSVE-NEXT: ret %a = load <16 x i64>, ptr %in %b = trunc <16 x i64> %a to <16 x i8> ret <16 x i8> %b @@ -565,6 +2580,143 @@ define void @trunc_v32i64_v32i8(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: add z0.b, z0.b, z0.b ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v32i64_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #416 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #96] +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #336] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #64] +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #352] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #368] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #32] +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #384] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #128] +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #400] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q7, q6, [x0] +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #320] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q19, q18, [x0, #224] +; NONEON-NOSVE-NEXT: str x1, [sp, #24] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q21, q20, [x0, #192] +; NONEON-NOSVE-NEXT: ldp q23, q22, [x0, #160] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #160] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #176] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #184] +; NONEON-NOSVE-NEXT: stp q2, q4, [sp, #192] +; NONEON-NOSVE-NEXT: stp q21, q19, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w25, [sp, #208] +; NONEON-NOSVE-NEXT: ldr w26, [sp, #216] +; NONEON-NOSVE-NEXT: add w5, w9, w9 +; NONEON-NOSVE-NEXT: add w6, w8, w8 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #192] +; NONEON-NOSVE-NEXT: stp q20, q23, [sp, #96] +; NONEON-NOSVE-NEXT: ldr w2, [sp, #64] +; NONEON-NOSVE-NEXT: ldr w16, [sp, #48] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: ldr w18, [sp, #96] +; NONEON-NOSVE-NEXT: stp q22, q16, [sp, #128] +; NONEON-NOSVE-NEXT: stp q6, q5, [sp, #224] +; NONEON-NOSVE-NEXT: ldr w3, [sp, #72] +; NONEON-NOSVE-NEXT: ldr w14, [sp, #128] +; NONEON-NOSVE-NEXT: stp q3, q17, [sp, #256] +; NONEON-NOSVE-NEXT: ldr w23, [sp, #240] +; NONEON-NOSVE-NEXT: ldr w21, [sp, #224] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #272] +; NONEON-NOSVE-NEXT: ldr w27, [sp, #256] +; NONEON-NOSVE-NEXT: ldr w28, [sp, #264] +; NONEON-NOSVE-NEXT: strb w9, [sp, #298] +; NONEON-NOSVE-NEXT: ldr w24, [sp, #248] +; NONEON-NOSVE-NEXT: ldr w22, [sp, #232] +; NONEON-NOSVE-NEXT: add w9, w27, w27 +; NONEON-NOSVE-NEXT: str w8, [sp, #20] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldr w8, [sp, #200] +; NONEON-NOSVE-NEXT: str q7, [sp, #32] +; NONEON-NOSVE-NEXT: ldr w0, [sp, #104] +; NONEON-NOSVE-NEXT: ldr w12, [sp, #112] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strb w9, [sp, #296] +; NONEON-NOSVE-NEXT: add w9, w25, w25 +; NONEON-NOSVE-NEXT: str q18, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w19, [sp, #32] +; NONEON-NOSVE-NEXT: ldr w20, [sp, #40] +; NONEON-NOSVE-NEXT: strb w8, [sp, #299] +; NONEON-NOSVE-NEXT: add w8, w28, w28 +; NONEON-NOSVE-NEXT: ldr w4, [sp, #80] +; NONEON-NOSVE-NEXT: strb w9, [sp, #294] +; NONEON-NOSVE-NEXT: add w9, w23, w23 +; NONEON-NOSVE-NEXT: ldr w7, [sp, #88] +; NONEON-NOSVE-NEXT: strb w8, [sp, #297] +; NONEON-NOSVE-NEXT: add w8, w26, w26 +; NONEON-NOSVE-NEXT: ldr w17, [sp, #56] +; NONEON-NOSVE-NEXT: strb w9, [sp, #292] +; NONEON-NOSVE-NEXT: add w9, w21, w21 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #144] +; NONEON-NOSVE-NEXT: strb w8, [sp, #295] +; NONEON-NOSVE-NEXT: add w8, w24, w24 +; NONEON-NOSVE-NEXT: ldr w15, [sp, #136] +; NONEON-NOSVE-NEXT: strb w9, [sp, #290] +; NONEON-NOSVE-NEXT: add w9, w19, w19 +; NONEON-NOSVE-NEXT: ldr w13, [sp, #120] +; NONEON-NOSVE-NEXT: strb w8, [sp, #293] +; NONEON-NOSVE-NEXT: add w8, w22, w22 +; NONEON-NOSVE-NEXT: ldr w11, [sp, #152] +; NONEON-NOSVE-NEXT: strb w9, [sp, #288] +; NONEON-NOSVE-NEXT: add w9, w4, w4 +; NONEON-NOSVE-NEXT: ldr w1, [sp, #280] +; NONEON-NOSVE-NEXT: strb w8, [sp, #291] +; NONEON-NOSVE-NEXT: add w8, w20, w20 +; NONEON-NOSVE-NEXT: ldr w29, [sp, #160] +; NONEON-NOSVE-NEXT: strb w9, [sp, #318] +; NONEON-NOSVE-NEXT: add w9, w2, w2 +; NONEON-NOSVE-NEXT: ldr w30, [sp, #168] +; NONEON-NOSVE-NEXT: strb w8, [sp, #289] +; NONEON-NOSVE-NEXT: add w8, w7, w7 +; NONEON-NOSVE-NEXT: strb w9, [sp, #316] +; NONEON-NOSVE-NEXT: add w9, w18, w18 +; NONEON-NOSVE-NEXT: strb w8, [sp, #319] +; NONEON-NOSVE-NEXT: add w8, w3, w3 +; NONEON-NOSVE-NEXT: strb w9, [sp, #314] +; NONEON-NOSVE-NEXT: add w9, w16, w16 +; NONEON-NOSVE-NEXT: strb w8, [sp, #317] +; NONEON-NOSVE-NEXT: add w8, w0, w0 +; NONEON-NOSVE-NEXT: strb w9, [sp, #312] +; NONEON-NOSVE-NEXT: add w9, w14, w14 +; NONEON-NOSVE-NEXT: strb w8, [sp, #315] +; NONEON-NOSVE-NEXT: add w8, w17, w17 +; NONEON-NOSVE-NEXT: strb w9, [sp, #310] +; NONEON-NOSVE-NEXT: add w9, w12, w12 +; NONEON-NOSVE-NEXT: strb w8, [sp, #313] +; NONEON-NOSVE-NEXT: add w8, w15, w15 +; NONEON-NOSVE-NEXT: strb w9, [sp, #308] +; NONEON-NOSVE-NEXT: add w9, w10, w10 +; NONEON-NOSVE-NEXT: strb w8, [sp, #311] +; NONEON-NOSVE-NEXT: add w8, w13, w13 +; NONEON-NOSVE-NEXT: strb w9, [sp, #306] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #20] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w8, [sp, #309] +; NONEON-NOSVE-NEXT: add w8, w11, w11 +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: strb w5, [sp, #303] +; NONEON-NOSVE-NEXT: add w5, w30, w30 +; NONEON-NOSVE-NEXT: strb w6, [sp, #302] +; NONEON-NOSVE-NEXT: add w6, w29, w29 +; NONEON-NOSVE-NEXT: strb w8, [sp, #307] +; NONEON-NOSVE-NEXT: add w8, w1, w1 +; NONEON-NOSVE-NEXT: strb w5, [sp, #301] +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #400] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w6, [sp, #300] +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #384] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w8, [sp, #305] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: strb w9, [sp, #304] +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #368] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #288] +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #352] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #336] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: stp q1, q0, [x8] +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #320] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: add sp, sp, #416 +; NONEON-NOSVE-NEXT: ret %a = load <32 x i64>, ptr %in %b = trunc <32 x i64> %a to <32 x i8> %c = add <32 x i8> %b, %b @@ -587,6 +2739,20 @@ define <4 x i16> @trunc_v4i64_v4i16(ptr %in) nounwind { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v4i64_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldp x8, x10, [sp] +; NONEON-NOSVE-NEXT: strh w9, [sp, #46] +; NONEON-NOSVE-NEXT: strh w10, [sp, #42] +; NONEON-NOSVE-NEXT: strh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %a = load <4 x i64>, ptr %in %b = trunc <4 x i64> %a to <4 x i16> ret <4 x i16> %b @@ -610,6 +2776,31 @@ define <8 x i16> @trunc_v8i64_v8i16(ptr %in) nounwind { ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v8i64_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #80 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: str q1, [sp, #48] +; NONEON-NOSVE-NEXT: stp q0, q3, [sp, #16] +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: str q2, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldp x8, x10, [sp, #48] +; NONEON-NOSVE-NEXT: strh w9, [sp, #78] +; NONEON-NOSVE-NEXT: strh w8, [sp, #72] +; NONEON-NOSVE-NEXT: ldp x8, x11, [sp, #32] +; NONEON-NOSVE-NEXT: strh w10, [sp, #74] +; NONEON-NOSVE-NEXT: strh w8, [sp, #68] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] +; NONEON-NOSVE-NEXT: strh w11, [sp, #70] +; NONEON-NOSVE-NEXT: strh w8, [sp, #66] +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %a = load <8 x i64>, ptr %in %b = trunc <8 x i64> %a to <8 x i16> ret <8 x i16> %b @@ -647,6 +2838,70 @@ define void @trunc_v16i64_v16i16(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: add z1.h, z3.h, z3.h ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v16i64_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #160 +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q5, q4, [x0] +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64] +; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #96] +; NONEON-NOSVE-NEXT: stp q2, q4, [sp, #64] +; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #96] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #72] +; NONEON-NOSVE-NEXT: ldr w2, [sp, #96] +; NONEON-NOSVE-NEXT: ldr w3, [sp, #104] +; NONEON-NOSVE-NEXT: stp q5, q7, [sp] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: ldr w4, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w5, [sp, #88] +; NONEON-NOSVE-NEXT: stp q6, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr w18, [sp] +; NONEON-NOSVE-NEXT: ldr w0, [sp, #8] +; NONEON-NOSVE-NEXT: strh w9, [sp, #142] +; NONEON-NOSVE-NEXT: add w9, w3, w3 +; NONEON-NOSVE-NEXT: strh w8, [sp, #140] +; NONEON-NOSVE-NEXT: add w8, w2, w2 +; NONEON-NOSVE-NEXT: ldr w16, [sp, #32] +; NONEON-NOSVE-NEXT: ldr w17, [sp, #40] +; NONEON-NOSVE-NEXT: strh w9, [sp, #138] +; NONEON-NOSVE-NEXT: add w9, w5, w5 +; NONEON-NOSVE-NEXT: strh w8, [sp, #136] +; NONEON-NOSVE-NEXT: add w8, w4, w4 +; NONEON-NOSVE-NEXT: ldr w14, [sp, #16] +; NONEON-NOSVE-NEXT: ldr w15, [sp, #24] +; NONEON-NOSVE-NEXT: strh w9, [sp, #134] +; NONEON-NOSVE-NEXT: add w9, w0, w0 +; NONEON-NOSVE-NEXT: strh w8, [sp, #132] +; NONEON-NOSVE-NEXT: add w8, w18, w18 +; NONEON-NOSVE-NEXT: ldr w12, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w13, [sp, #56] +; NONEON-NOSVE-NEXT: strh w9, [sp, #130] +; NONEON-NOSVE-NEXT: add w9, w17, w17 +; NONEON-NOSVE-NEXT: strh w8, [sp, #128] +; NONEON-NOSVE-NEXT: add w8, w16, w16 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #112] +; NONEON-NOSVE-NEXT: ldr w11, [sp, #120] +; NONEON-NOSVE-NEXT: strh w9, [sp, #158] +; NONEON-NOSVE-NEXT: add w9, w15, w15 +; NONEON-NOSVE-NEXT: strh w8, [sp, #156] +; NONEON-NOSVE-NEXT: add w8, w14, w14 +; NONEON-NOSVE-NEXT: strh w9, [sp, #154] +; NONEON-NOSVE-NEXT: add w9, w13, w13 +; NONEON-NOSVE-NEXT: strh w8, [sp, #152] +; NONEON-NOSVE-NEXT: add w8, w12, w12 +; NONEON-NOSVE-NEXT: strh w9, [sp, #150] +; NONEON-NOSVE-NEXT: add w9, w11, w11 +; NONEON-NOSVE-NEXT: strh w8, [sp, #148] +; NONEON-NOSVE-NEXT: add w8, w10, w10 +; NONEON-NOSVE-NEXT: strh w9, [sp, #146] +; NONEON-NOSVE-NEXT: strh w8, [sp, #144] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #128] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #160 +; NONEON-NOSVE-NEXT: ret %a = load <16 x i64>, ptr %in %b = trunc <16 x i64> %a to <16 x i16> %c = add <16 x i16> %b, %b @@ -711,6 +2966,144 @@ define void @trunc_v32i64_v32i16(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: stp q1, q2, [x1, #32] ; CHECK-NEXT: stp q3, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v32i64_v32i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #432 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #96] +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #352] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #64] +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #368] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #384] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #128] +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #400] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #32] +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #416] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q7, q6, [x0] +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #336] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q19, q18, [x0, #224] +; NONEON-NOSVE-NEXT: ldp q21, q20, [x0, #192] +; NONEON-NOSVE-NEXT: ldp q23, q22, [x0, #160] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #144] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #160] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #168] +; NONEON-NOSVE-NEXT: stp q2, q4, [sp, #176] +; NONEON-NOSVE-NEXT: stp q21, q19, [sp, #32] +; NONEON-NOSVE-NEXT: ldr w25, [sp, #192] +; NONEON-NOSVE-NEXT: ldr w26, [sp, #200] +; NONEON-NOSVE-NEXT: add w6, w8, w8 +; NONEON-NOSVE-NEXT: add w5, w9, w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #176] +; NONEON-NOSVE-NEXT: stp q20, q23, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w2, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w3, [sp, #56] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: ldr w18, [sp, #80] +; NONEON-NOSVE-NEXT: stp q22, q16, [sp, #112] +; NONEON-NOSVE-NEXT: stp q6, q5, [sp, #208] +; NONEON-NOSVE-NEXT: ldr w0, [sp, #88] +; NONEON-NOSVE-NEXT: ldr w16, [sp, #32] +; NONEON-NOSVE-NEXT: stp q3, q17, [sp, #240] +; NONEON-NOSVE-NEXT: ldr w23, [sp, #224] +; NONEON-NOSVE-NEXT: ldr w24, [sp, #232] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #256] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #264] +; NONEON-NOSVE-NEXT: ldr w27, [sp, #240] +; NONEON-NOSVE-NEXT: ldr w28, [sp, #248] +; NONEON-NOSVE-NEXT: strh w9, [sp, #308] +; NONEON-NOSVE-NEXT: ldr w21, [sp, #208] +; NONEON-NOSVE-NEXT: add w9, w27, w27 +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #8] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldr w8, [sp, #184] +; NONEON-NOSVE-NEXT: str q7, [sp, #16] +; NONEON-NOSVE-NEXT: ldr w22, [sp, #216] +; NONEON-NOSVE-NEXT: ldr w17, [sp, #40] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w9, [sp, #304] +; NONEON-NOSVE-NEXT: add w9, w25, w25 +; NONEON-NOSVE-NEXT: strh w8, [sp, #310] +; NONEON-NOSVE-NEXT: add w8, w28, w28 +; NONEON-NOSVE-NEXT: ldr w19, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #306] +; NONEON-NOSVE-NEXT: add w8, w26, w26 +; NONEON-NOSVE-NEXT: ldr w20, [sp, #24] +; NONEON-NOSVE-NEXT: str q18, [sp, #64] +; NONEON-NOSVE-NEXT: ldr w14, [sp, #112] +; NONEON-NOSVE-NEXT: ldr w15, [sp, #120] +; NONEON-NOSVE-NEXT: strh w8, [sp, #302] +; NONEON-NOSVE-NEXT: add w8, w24, w24 +; NONEON-NOSVE-NEXT: ldr w4, [sp, #64] +; NONEON-NOSVE-NEXT: strh w9, [sp, #300] +; NONEON-NOSVE-NEXT: add w9, w23, w23 +; NONEON-NOSVE-NEXT: ldr w7, [sp, #72] +; NONEON-NOSVE-NEXT: strh w8, [sp, #298] +; NONEON-NOSVE-NEXT: add w8, w22, w22 +; NONEON-NOSVE-NEXT: ldr w12, [sp, #96] +; NONEON-NOSVE-NEXT: strh w9, [sp, #296] +; NONEON-NOSVE-NEXT: add w9, w21, w21 +; NONEON-NOSVE-NEXT: ldr w13, [sp, #104] +; NONEON-NOSVE-NEXT: strh w8, [sp, #294] +; NONEON-NOSVE-NEXT: add w8, w20, w20 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #128] +; NONEON-NOSVE-NEXT: strh w9, [sp, #292] +; NONEON-NOSVE-NEXT: add w9, w19, w19 +; NONEON-NOSVE-NEXT: ldr w11, [sp, #136] +; NONEON-NOSVE-NEXT: strh w8, [sp, #290] +; NONEON-NOSVE-NEXT: add w8, w7, w7 +; NONEON-NOSVE-NEXT: ldr w29, [sp, #144] +; NONEON-NOSVE-NEXT: strh w9, [sp, #288] +; NONEON-NOSVE-NEXT: add w9, w4, w4 +; NONEON-NOSVE-NEXT: ldr w30, [sp, #152] +; NONEON-NOSVE-NEXT: strh w8, [sp, #286] +; NONEON-NOSVE-NEXT: add w8, w3, w3 +; NONEON-NOSVE-NEXT: strh w9, [sp, #284] +; NONEON-NOSVE-NEXT: add w9, w2, w2 +; NONEON-NOSVE-NEXT: strh w8, [sp, #282] +; NONEON-NOSVE-NEXT: add w8, w0, w0 +; NONEON-NOSVE-NEXT: strh w9, [sp, #280] +; NONEON-NOSVE-NEXT: add w9, w18, w18 +; NONEON-NOSVE-NEXT: strh w8, [sp, #278] +; NONEON-NOSVE-NEXT: add w8, w17, w17 +; NONEON-NOSVE-NEXT: strh w9, [sp, #276] +; NONEON-NOSVE-NEXT: add w9, w16, w16 +; NONEON-NOSVE-NEXT: strh w8, [sp, #274] +; NONEON-NOSVE-NEXT: add w8, w15, w15 +; NONEON-NOSVE-NEXT: strh w9, [sp, #272] +; NONEON-NOSVE-NEXT: add w9, w14, w14 +; NONEON-NOSVE-NEXT: strh w8, [sp, #334] +; NONEON-NOSVE-NEXT: add w8, w13, w13 +; NONEON-NOSVE-NEXT: strh w9, [sp, #332] +; NONEON-NOSVE-NEXT: add w9, w12, w12 +; NONEON-NOSVE-NEXT: strh w8, [sp, #330] +; NONEON-NOSVE-NEXT: add w8, w11, w11 +; NONEON-NOSVE-NEXT: strh w9, [sp, #328] +; NONEON-NOSVE-NEXT: add w9, w10, w10 +; NONEON-NOSVE-NEXT: strh w8, [sp, #326] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w9, [sp, #324] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: strh w5, [sp, #318] +; NONEON-NOSVE-NEXT: add w5, w30, w30 +; NONEON-NOSVE-NEXT: strh w6, [sp, #316] +; NONEON-NOSVE-NEXT: add w6, w29, w29 +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: strh w5, [sp, #314] +; NONEON-NOSVE-NEXT: ldp q1, q3, [sp, #272] +; NONEON-NOSVE-NEXT: strh w6, [sp, #312] +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #416] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w8, [sp, #322] +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #400] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: strh w9, [sp, #320] +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #384] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp q2, q0, [sp, #304] +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #368] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #352] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: stp q3, q2, [x1] +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #336] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: stp q0, q1, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #432 +; NONEON-NOSVE-NEXT: ret %a = load <32 x i64>, ptr %in %b = trunc <32 x i64> %a to <32 x i16> %c = add <32 x i16> %b, %b @@ -732,6 +3125,18 @@ define <4 x i32> @trunc_v4i64_v4i32(ptr %in) nounwind { ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v4i64_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldp x8, x10, [sp] +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %a = load <4 x i64>, ptr %in %b = trunc <4 x i64> %a to <4 x i32> ret <4 x i32> %b @@ -754,6 +3159,38 @@ define void @trunc_v8i64_v8i32(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: add z1.s, z2.s, z2.s ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v8i64_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #96 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] +; NONEON-NOSVE-NEXT: stp q3, q1, [sp] +; NONEON-NOSVE-NEXT: stp q2, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr w12, [sp] +; NONEON-NOSVE-NEXT: ldr w13, [sp, #8] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #40] +; NONEON-NOSVE-NEXT: ldr w14, [sp, #16] +; NONEON-NOSVE-NEXT: ldr w15, [sp, #24] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w11, [sp, #56] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] +; NONEON-NOSVE-NEXT: add w9, w13, w13 +; NONEON-NOSVE-NEXT: add w8, w12, w12 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] +; NONEON-NOSVE-NEXT: add w9, w15, w15 +; NONEON-NOSVE-NEXT: add w8, w14, w14 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] +; NONEON-NOSVE-NEXT: add w9, w11, w11 +; NONEON-NOSVE-NEXT: add w8, w10, w10 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %a = load <8 x i64>, ptr %in %b = trunc <8 x i64> %a to <8 x i32> %c = add <8 x i32> %b, %b @@ -789,6 +3226,64 @@ define void @trunc_v16i64_v16i32(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: stp q0, q1, [x1, #32] ; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v16i64_v16i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #192 +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q5, q4, [x0] +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64] +; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #96] +; NONEON-NOSVE-NEXT: stp q2, q4, [sp, #64] +; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #96] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #72] +; NONEON-NOSVE-NEXT: ldr w2, [sp, #96] +; NONEON-NOSVE-NEXT: ldr w3, [sp, #104] +; NONEON-NOSVE-NEXT: stp q5, q7, [sp] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: ldr w4, [sp, #80] +; NONEON-NOSVE-NEXT: ldr w5, [sp, #88] +; NONEON-NOSVE-NEXT: stp q6, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr w18, [sp] +; NONEON-NOSVE-NEXT: ldr w0, [sp, #8] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #168] +; NONEON-NOSVE-NEXT: add w9, w3, w3 +; NONEON-NOSVE-NEXT: add w8, w2, w2 +; NONEON-NOSVE-NEXT: ldr w16, [sp, #32] +; NONEON-NOSVE-NEXT: ldr w17, [sp, #40] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #160] +; NONEON-NOSVE-NEXT: add w9, w5, w5 +; NONEON-NOSVE-NEXT: add w8, w4, w4 +; NONEON-NOSVE-NEXT: ldr w14, [sp, #16] +; NONEON-NOSVE-NEXT: ldr w15, [sp, #24] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152] +; NONEON-NOSVE-NEXT: add w9, w0, w0 +; NONEON-NOSVE-NEXT: add w8, w18, w18 +; NONEON-NOSVE-NEXT: ldr w12, [sp, #48] +; NONEON-NOSVE-NEXT: ldr w13, [sp, #56] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] +; NONEON-NOSVE-NEXT: add w9, w17, w17 +; NONEON-NOSVE-NEXT: add w8, w16, w16 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #112] +; NONEON-NOSVE-NEXT: ldr w11, [sp, #120] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] +; NONEON-NOSVE-NEXT: add w9, w15, w15 +; NONEON-NOSVE-NEXT: add w8, w14, w14 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] +; NONEON-NOSVE-NEXT: add w9, w13, w13 +; NONEON-NOSVE-NEXT: add w8, w12, w12 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #184] +; NONEON-NOSVE-NEXT: add w9, w11, w11 +; NONEON-NOSVE-NEXT: add w8, w10, w10 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #176] +; NONEON-NOSVE-NEXT: ldp q1, q3, [sp, #128] +; NONEON-NOSVE-NEXT: ldp q2, q0, [sp, #160] +; NONEON-NOSVE-NEXT: stp q3, q2, [x1] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #192 +; NONEON-NOSVE-NEXT: ret %a = load <16 x i64>, ptr %in %b = trunc <16 x i64> %a to <16 x i32> %c = add <16 x i32> %b, %b @@ -846,6 +3341,149 @@ define void @trunc_v32i64_v32i32(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: stp q2, q3, [x1, #32] ; CHECK-NEXT: stp q4, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v32i64_v32i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #496 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #416] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #192] +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #432] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #448] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q23, q22, [x0, #224] +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #464] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #480] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #96] +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #400] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #64] +; NONEON-NOSVE-NEXT: ldp q19, q18, [x0, #160] +; NONEON-NOSVE-NEXT: ldp q21, q20, [x0, #128] +; NONEON-NOSVE-NEXT: str q0, [sp, #192] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #192] +; NONEON-NOSVE-NEXT: stp q17, q23, [sp, #32] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #200] +; NONEON-NOSVE-NEXT: ldr w10, [sp, #32] +; NONEON-NOSVE-NEXT: stp q4, q6, [sp, #160] +; NONEON-NOSVE-NEXT: ldr w12, [sp, #48] +; NONEON-NOSVE-NEXT: add w6, w8, w8 +; NONEON-NOSVE-NEXT: add w5, w9, w9 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] +; NONEON-NOSVE-NEXT: stp q18, q20, [sp, #112] +; NONEON-NOSVE-NEXT: ldr w25, [sp, #160] +; NONEON-NOSVE-NEXT: ldr w26, [sp, #168] +; NONEON-NOSVE-NEXT: str q5, [sp, #144] +; NONEON-NOSVE-NEXT: ldr w21, [sp, #176] +; NONEON-NOSVE-NEXT: ldr w22, [sp, #184] +; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #208] +; NONEON-NOSVE-NEXT: ldr w23, [sp, #144] +; NONEON-NOSVE-NEXT: ldr w24, [sp, #152] +; NONEON-NOSVE-NEXT: str q3, [sp, #16] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #208] +; NONEON-NOSVE-NEXT: ldr w4, [sp, #112] +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #8] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldr w8, [sp, #216] +; NONEON-NOSVE-NEXT: ldr w27, [sp, #16] +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: ldr w28, [sp, #24] +; NONEON-NOSVE-NEXT: stp q22, q16, [sp, #64] +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: str w9, [sp, #344] +; NONEON-NOSVE-NEXT: add w9, w27, w27 +; NONEON-NOSVE-NEXT: str w8, [sp, #348] +; NONEON-NOSVE-NEXT: add w8, w28, w28 +; NONEON-NOSVE-NEXT: ldr w7, [sp, #120] +; NONEON-NOSVE-NEXT: stp q7, q21, [sp, #240] +; NONEON-NOSVE-NEXT: ldr w18, [sp, #128] +; NONEON-NOSVE-NEXT: ldr w0, [sp, #136] +; NONEON-NOSVE-NEXT: str w8, [sp, #340] +; NONEON-NOSVE-NEXT: add w8, w26, w26 +; NONEON-NOSVE-NEXT: ldr w19, [sp, #240] +; NONEON-NOSVE-NEXT: str w9, [sp, #336] +; NONEON-NOSVE-NEXT: add w9, w25, w25 +; NONEON-NOSVE-NEXT: ldr w20, [sp, #248] +; NONEON-NOSVE-NEXT: str w8, [sp, #332] +; NONEON-NOSVE-NEXT: add w8, w24, w24 +; NONEON-NOSVE-NEXT: ldr w16, [sp, #256] +; NONEON-NOSVE-NEXT: str w9, [sp, #328] +; NONEON-NOSVE-NEXT: add w9, w23, w23 +; NONEON-NOSVE-NEXT: ldr w17, [sp, #264] +; NONEON-NOSVE-NEXT: str q19, [sp, #96] +; NONEON-NOSVE-NEXT: ldr w14, [sp, #64] +; NONEON-NOSVE-NEXT: ldr w15, [sp, #72] +; NONEON-NOSVE-NEXT: str w8, [sp, #324] +; NONEON-NOSVE-NEXT: add w8, w22, w22 +; NONEON-NOSVE-NEXT: ldr w2, [sp, #96] +; NONEON-NOSVE-NEXT: str w9, [sp, #320] +; NONEON-NOSVE-NEXT: add w9, w21, w21 +; NONEON-NOSVE-NEXT: ldr w3, [sp, #104] +; NONEON-NOSVE-NEXT: str w8, [sp, #380] +; NONEON-NOSVE-NEXT: add w8, w20, w20 +; NONEON-NOSVE-NEXT: ldr w13, [sp, #56] +; NONEON-NOSVE-NEXT: str w9, [sp, #376] +; NONEON-NOSVE-NEXT: add w9, w19, w19 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #80] +; NONEON-NOSVE-NEXT: str w8, [sp, #372] +; NONEON-NOSVE-NEXT: add w8, w7, w7 +; NONEON-NOSVE-NEXT: ldr w11, [sp, #88] +; NONEON-NOSVE-NEXT: str w9, [sp, #368] +; NONEON-NOSVE-NEXT: add w9, w4, w4 +; NONEON-NOSVE-NEXT: ldr w29, [sp, #224] +; NONEON-NOSVE-NEXT: str w8, [sp, #316] +; NONEON-NOSVE-NEXT: add w8, w3, w3 +; NONEON-NOSVE-NEXT: ldr w30, [sp, #232] +; NONEON-NOSVE-NEXT: str w9, [sp, #312] +; NONEON-NOSVE-NEXT: add w9, w2, w2 +; NONEON-NOSVE-NEXT: str w8, [sp, #308] +; NONEON-NOSVE-NEXT: add w8, w0, w0 +; NONEON-NOSVE-NEXT: str w9, [sp, #304] +; NONEON-NOSVE-NEXT: add w9, w18, w18 +; NONEON-NOSVE-NEXT: str w8, [sp, #396] +; NONEON-NOSVE-NEXT: add w8, w17, w17 +; NONEON-NOSVE-NEXT: str w9, [sp, #392] +; NONEON-NOSVE-NEXT: add w9, w16, w16 +; NONEON-NOSVE-NEXT: str w8, [sp, #388] +; NONEON-NOSVE-NEXT: add w8, w15, w15 +; NONEON-NOSVE-NEXT: str w9, [sp, #384] +; NONEON-NOSVE-NEXT: add w9, w14, w14 +; NONEON-NOSVE-NEXT: str w8, [sp, #284] +; NONEON-NOSVE-NEXT: add w8, w13, w13 +; NONEON-NOSVE-NEXT: str w9, [sp, #280] +; NONEON-NOSVE-NEXT: add w9, w12, w12 +; NONEON-NOSVE-NEXT: str w8, [sp, #276] +; NONEON-NOSVE-NEXT: add w8, w11, w11 +; NONEON-NOSVE-NEXT: str w9, [sp, #272] +; NONEON-NOSVE-NEXT: add w9, w10, w10 +; NONEON-NOSVE-NEXT: str w8, [sp, #300] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: str w9, [sp, #296] +; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: add w8, w8, w8 +; NONEON-NOSVE-NEXT: str w5, [sp, #364] +; NONEON-NOSVE-NEXT: add w5, w30, w30 +; NONEON-NOSVE-NEXT: add w9, w9, w9 +; NONEON-NOSVE-NEXT: str w6, [sp, #360] +; NONEON-NOSVE-NEXT: add w6, w29, w29 +; NONEON-NOSVE-NEXT: str w5, [sp, #356] +; NONEON-NOSVE-NEXT: ldp q6, q3, [sp, #304] +; NONEON-NOSVE-NEXT: str w6, [sp, #352] +; NONEON-NOSVE-NEXT: ldp q4, q7, [sp, #368] +; NONEON-NOSVE-NEXT: str w8, [sp, #292] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #336] +; NONEON-NOSVE-NEXT: str w9, [sp, #288] +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #480] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #272] +; NONEON-NOSVE-NEXT: stp q4, q3, [x1, #32] +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #464] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: stp q7, q6, [x1, #64] +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #448] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: stp q2, q5, [x1, #96] +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #432] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #416] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #400] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: add sp, sp, #496 +; NONEON-NOSVE-NEXT: ret %a = load <32 x i64>, ptr %in %b = trunc <32 x i64> %a to <32 x i32> %c = add <32 x i32> %b, %b diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll index 304823c9e6414..323f5f56a2c08 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -14,6 +15,21 @@ define <4 x i8> @shuffle_ext_byone_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-NEXT: tbl z0.h, { z0.h }, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: strh w8, [sp, #10] +; NONEON-NOSVE-NEXT: ldrh w8, [sp] +; NONEON-NOSVE-NEXT: strh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ldur w8, [sp, #2] +; NONEON-NOSVE-NEXT: ror w8, w8, #16 +; NONEON-NOSVE-NEXT: str w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <4 x i8> %op1, <4 x i8> %op2, <4 x i32> ret <4 x i8> %ret } @@ -28,6 +44,23 @@ define <8 x i8> @shuffle_ext_byone_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: insr z1.b, w8 ; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: sturh w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: stur w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <8 x i8> %op1, <8 x i8> %op2, <8 x i32> ret <8 x i8> %ret } @@ -42,6 +75,24 @@ define <16 x i8> @shuffle_ext_byone_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: insr z1.b, w8 ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] +; NONEON-NOSVE-NEXT: strb w8, [sp, #47] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: sturh w8, [sp, #45] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: stur w8, [sp, #41] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: stur x8, [sp, #33] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #15] +; NONEON-NOSVE-NEXT: strb w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <16 x i8> %op1, <16 x i8> %op2, <16 x i32> ret <16 x i8> %ret @@ -60,6 +111,39 @@ define void @shuffle_ext_byone_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: insr z3.b, w8 ; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ldr q2, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [sp, #-80]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] +; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #32] +; NONEON-NOSVE-NEXT: strb w8, [sp, #31] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: sturh w8, [sp, #29] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: stur w8, [sp, #25] +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: stur x8, [sp, #17] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #63] +; NONEON-NOSVE-NEXT: strb w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #62] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #16] +; NONEON-NOSVE-NEXT: strb w8, [sp, #79] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60] +; NONEON-NOSVE-NEXT: sturh w8, [sp, #77] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] +; NONEON-NOSVE-NEXT: stur w8, [sp, #73] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #48] +; NONEON-NOSVE-NEXT: stur x8, [sp, #65] +; NONEON-NOSVE-NEXT: ldrb w8, [sp, #47] +; NONEON-NOSVE-NEXT: strb w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %ret = shufflevector <32 x i8> %op1, <32 x i8> %op2, <32 x i32> @shuffle_ext_byone_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; CHECK-NEXT: revw z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldp w9, w8, [sp] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <2 x i16> %op1, <2 x i16> %op2, <2 x i32> ret <2 x i16> %ret } @@ -92,6 +186,21 @@ define <4 x i16> @shuffle_ext_byone_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: insr z1.h, w8 ; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: stur w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: strh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <4 x i16> %op1, <4 x i16> %op2, <4 x i32> ret <4 x i16> %ret } @@ -106,6 +215,22 @@ define <8 x i16> @shuffle_ext_byone_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: insr z1.h, w8 ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: strh w8, [sp, #46] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: stur w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: stur x8, [sp, #34] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: strh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <8 x i16> %op1, <8 x i16> %op2, <8 x i32> ret <8 x i16> %ret } @@ -123,6 +248,35 @@ define void @shuffle_ext_byone_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: insr z3.h, w8 ; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ldr q2, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [sp, #-80]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #32] +; NONEON-NOSVE-NEXT: strh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: stur w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: stur x8, [sp, #18] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #62] +; NONEON-NOSVE-NEXT: strh w8, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #16] +; NONEON-NOSVE-NEXT: strh w8, [sp, #78] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] +; NONEON-NOSVE-NEXT: stur w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #48] +; NONEON-NOSVE-NEXT: stur x8, [sp, #66] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #46] +; NONEON-NOSVE-NEXT: strh w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %ret = shufflevector <16 x i16> %op1, <16 x i16> %op2, <16 x i32> @shuffle_ext_byone_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: insr z1.s, w8 ; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #12] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <2 x i32> %op1, <2 x i32> %op2, <2 x i32> ret <2 x i32> %ret } @@ -155,6 +320,20 @@ define <4 x i32> @shuffle_ext_byone_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: insr z1.s, w8 ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: str w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: stur x8, [sp, #36] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] +; NONEON-NOSVE-NEXT: str w8, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <4 x i32> %op1, <4 x i32> %op2, <4 x i32> ret <4 x i32> %ret } @@ -172,6 +351,30 @@ define void @shuffle_ext_byone_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: insr z3.s, w8 ; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ldr q2, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [sp, #-80]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #32] +; NONEON-NOSVE-NEXT: str w8, [sp, #28] +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: stur x8, [sp, #20] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #56] +; NONEON-NOSVE-NEXT: str w8, [sp, #76] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #48] +; NONEON-NOSVE-NEXT: str w9, [sp, #16] +; NONEON-NOSVE-NEXT: stur x8, [sp, #68] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #16] +; NONEON-NOSVE-NEXT: str w8, [sp, #64] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %ret = shufflevector <8 x i32> %op1, <8 x i32> %op2, <8 x i32> @@ -189,6 +392,16 @@ define <2 x i64> @shuffle_ext_byone_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: insr z1.d, x8 ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #8] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <2 x i64> %op1, <2 x i64> %op2, <2 x i32> ret <2 x i64> %ret } @@ -206,6 +419,24 @@ define void @shuffle_ext_byone_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: insr z3.d, x8 ; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ldr q2, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [sp, #-80]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #32] +; NONEON-NOSVE-NEXT: ldr x9, [sp] +; NONEON-NOSVE-NEXT: ldp x11, x10, [sp, #48] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #40] +; NONEON-NOSVE-NEXT: stp x10, x9, [sp, #16] +; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #64] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %ret = shufflevector <4 x i64> %op1, <4 x i64> %op2, <4 x i32> @@ -223,6 +454,21 @@ define <4 x half> @shuffle_ext_byone_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-NEXT: insr z0.h, h2 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: stur w8, [sp, #26] +; NONEON-NOSVE-NEXT: str h0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <4 x half> %op1, <4 x half> %op2, <4 x i32> ret <4 x half> %ret } @@ -236,6 +482,22 @@ define <8 x half> @shuffle_ext_byone_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-NEXT: insr z0.h, h2 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] +; NONEON-NOSVE-NEXT: str h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] +; NONEON-NOSVE-NEXT: stur w8, [sp, #42] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: str h0, [sp, #32] +; NONEON-NOSVE-NEXT: stur x8, [sp, #34] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <8 x half> %op1, <8 x half> %op2, <8 x i32> ret <8 x half> %ret } @@ -251,6 +513,35 @@ define void @shuffle_ext_byone_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: insr z3.h, h2 ; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ldr q2, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [sp, #-80]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] +; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #32] +; NONEON-NOSVE-NEXT: str h0, [sp, #30] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: stur w8, [sp, #26] +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: str h0, [sp, #16] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] +; NONEON-NOSVE-NEXT: stur x8, [sp, #18] +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] +; NONEON-NOSVE-NEXT: str h0, [sp, #78] +; NONEON-NOSVE-NEXT: ldr h0, [sp, #46] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #16] +; NONEON-NOSVE-NEXT: stur w8, [sp, #74] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #48] +; NONEON-NOSVE-NEXT: str h0, [sp, #64] +; NONEON-NOSVE-NEXT: stur x8, [sp, #66] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %ret = shufflevector <16 x half> %op1, <16 x half> %op2, <16 x i32> @shuffle_ext_byone_v2f32(<2 x float> %op1, <2 x float> %op2) ; CHECK-NEXT: insr z0.s, s2 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #32 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #12] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <2 x float> %op1, <2 x float> %op2, <2 x i32> ret <2 x float> %ret } @@ -281,6 +583,20 @@ define <4 x float> @shuffle_ext_byone_v4f32(<4 x float> %op1, <4 x float> %op2) ; CHECK-NEXT: insr z0.s, s2 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: str s0, [sp, #44] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] +; NONEON-NOSVE-NEXT: stur x8, [sp, #36] +; NONEON-NOSVE-NEXT: str s0, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <4 x float> %op1, <4 x float> %op2, <4 x i32> ret <4 x float> %ret } @@ -296,6 +612,30 @@ define void @shuffle_ext_byone_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: insr z3.s, s2 ; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ldr q2, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [sp, #-80]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] +; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #32] +; NONEON-NOSVE-NEXT: ldr x8, [sp] +; NONEON-NOSVE-NEXT: str s0, [sp, #28] +; NONEON-NOSVE-NEXT: ldp s0, s1, [sp, #56] +; NONEON-NOSVE-NEXT: stur x8, [sp, #20] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #48] +; NONEON-NOSVE-NEXT: str s0, [sp, #76] +; NONEON-NOSVE-NEXT: ldr s0, [sp, #44] +; NONEON-NOSVE-NEXT: str s1, [sp, #16] +; NONEON-NOSVE-NEXT: stur x8, [sp, #68] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #16] +; NONEON-NOSVE-NEXT: str s0, [sp, #64] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %ret = shufflevector <8 x float> %op1, <8 x float> %op2, <8 x i32> @@ -312,6 +652,16 @@ define <2 x double> @shuffle_ext_byone_v2f64(<2 x double> %op1, <2 x double> %op ; CHECK-NEXT: insr z0.d, d2 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #8] +; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <2 x double> %op1, <2 x double> %op2, <2 x i32> ret <2 x double> %ret } @@ -327,6 +677,24 @@ define void @shuffle_ext_byone_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: insr z3.d, d2 ; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ldr q2, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [sp, #-80]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #32] +; NONEON-NOSVE-NEXT: ldr d1, [sp] +; NONEON-NOSVE-NEXT: ldp d3, d2, [sp, #48] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: stp d2, d1, [sp, #16] +; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #64] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %ret = shufflevector <4 x double> %op1, <4 x double> %op2, <4 x i32> @@ -345,6 +713,25 @@ define void @shuffle_ext_byone_reverse(ptr %a, ptr %b) { ; CHECK-NEXT: insr z3.d, d2 ; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_reverse: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #80 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ldr q2, [x1, #16] +; NONEON-NOSVE-NEXT: str q2, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #40] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] +; NONEON-NOSVE-NEXT: ldr d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldr q1, [sp, #64] +; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #80 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %ret = shufflevector <4 x double> %op1, <4 x double> %op2, <4 x i32> @@ -359,6 +746,13 @@ define void @shuffle_ext_invalid(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_invalid: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %ret = shufflevector <4 x double> %op1, <4 x double> %op2, <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll index 6c9c055605668..67cdde718e391 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -11,6 +12,11 @@ define fp128 @test_streaming_compatible_register_mov(fp128 %q0, fp128 %q1) { ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_streaming_compatible_register_mov: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret ret fp128 %q1 } @@ -20,6 +26,11 @@ define double @fp_zero_constant() { ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d0, xzr ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fp_zero_constant: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov d0, xzr +; NONEON-NOSVE-NEXT: ret ret double 0.0 } @@ -29,6 +40,12 @@ define <2 x i64> @fixed_vec_zero_constant() { ; CHECK-NEXT: mov z0.d, #0 // =0x0 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fixed_vec_zero_constant: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI2_0 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI2_0] +; NONEON-NOSVE-NEXT: ret ret <2 x i64> zeroinitializer } @@ -38,5 +55,11 @@ define <2 x double> @fixed_vec_fp_zero_constant() { ; CHECK-NEXT: mov z0.d, #0 // =0x0 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fixed_vec_fp_zero_constant: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI3_0 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI3_0] +; NONEON-NOSVE-NEXT: ret ret <2 x double> } diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll index 61b67755a3544..7934f831a7e62 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+b16b16 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+sme2 -mattr=+b16b16 -verify-machineinstrs < %s | FileCheck %s define @bfclamp( %a, %b, %c){ ; CHECK-LABEL: bfclamp: @@ -11,3 +11,27 @@ define @bfclamp( %a, @llvm.aarch64.sve.fclamp.nxv8bf16(, , ) + +define { , } @test_bfclamp_single_x2_f16( %a, %b, %c, %d){ +; CHECK-LABEL: test_bfclamp_single_x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: bfclamp { z0.h, z1.h }, z2.h, z3.h +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.bfclamp.single.x2.nxv8bf16( %a, %b, %c, %d) + ret { , } %res +} + +define { , , , } @test_bfclamp_single_x4_f16( %a, %b, %c, %d, %e, %f){ +; CHECK-LABEL: test_bfclamp_single_x4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: bfclamp { z0.h - z3.h }, z4.h, z5.h +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.bfclamp.single.x4.nxv8bf16( %a, %b, %c, %d, %e, %f) + ret { , , , } %res +} diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll index ab7cea8dfb778..c9fe89aec8ad9 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll @@ -4725,94 +4725,102 @@ define i32 @full(ptr %p1, i32 noundef %s1, ptr %p2, i32 noundef %s2) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-GI-NEXT: // kill: def $w3 killed $w3 def $x3 -; CHECK-GI-NEXT: sxtw x8, w3 ; CHECK-GI-NEXT: sxtw x9, w1 +; CHECK-GI-NEXT: sxtw x8, w3 ; CHECK-GI-NEXT: ldr d0, [x0] ; CHECK-GI-NEXT: ldr d1, [x2] ; CHECK-GI-NEXT: add x10, x0, x9 ; CHECK-GI-NEXT: add x11, x2, x8 -; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-GI-NEXT: ldr d2, [x10] -; CHECK-GI-NEXT: add x10, x10, x9 -; CHECK-GI-NEXT: add x12, x11, x8 -; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-GI-NEXT: ldr d3, [x11] -; CHECK-GI-NEXT: ldr d4, [x10] -; CHECK-GI-NEXT: ldr d5, [x12] -; CHECK-GI-NEXT: add x10, x10, x9 -; CHECK-GI-NEXT: add x11, x12, x8 -; CHECK-GI-NEXT: ushll v2.8h, v2.8b, #0 -; CHECK-GI-NEXT: ushll v3.8h, v3.8b, #0 -; CHECK-GI-NEXT: ushll v4.8h, v4.8b, #0 -; CHECK-GI-NEXT: ushll v5.8h, v5.8b, #0 -; CHECK-GI-NEXT: uabdl v6.4s, v0.4h, v1.4h -; CHECK-GI-NEXT: uabdl2 v0.4s, v0.8h, v1.8h +; CHECK-GI-NEXT: usubl v0.8h, v0.8b, v1.8b ; CHECK-GI-NEXT: ldr d1, [x10] -; CHECK-GI-NEXT: ldr d7, [x11] +; CHECK-GI-NEXT: ldr d2, [x11] ; CHECK-GI-NEXT: add x10, x10, x9 ; CHECK-GI-NEXT: add x11, x11, x8 -; CHECK-GI-NEXT: uabdl v16.4s, v2.4h, v3.4h -; CHECK-GI-NEXT: uabdl2 v2.4s, v2.8h, v3.8h -; CHECK-GI-NEXT: uabdl v3.4s, v4.4h, v5.4h -; CHECK-GI-NEXT: uabdl2 v4.4s, v4.8h, v5.8h -; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-GI-NEXT: ushll v7.8h, v7.8b, #0 -; CHECK-GI-NEXT: ldr d5, [x10] -; CHECK-GI-NEXT: ldr d17, [x11] +; CHECK-GI-NEXT: usubl v1.8h, v1.8b, v2.8b +; CHECK-GI-NEXT: ldr d3, [x10] +; CHECK-GI-NEXT: ldr d4, [x11] +; CHECK-GI-NEXT: sshll v5.4s, v0.4h, #0 +; CHECK-GI-NEXT: sshll2 v0.4s, v0.8h, #0 +; CHECK-GI-NEXT: add x10, x10, x9 +; CHECK-GI-NEXT: add x11, x11, x8 +; CHECK-GI-NEXT: ldr d2, [x10] ; CHECK-GI-NEXT: add x10, x10, x9 +; CHECK-GI-NEXT: sshll v7.4s, v1.4h, #0 +; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0 +; CHECK-GI-NEXT: ldr d6, [x11] ; CHECK-GI-NEXT: add x11, x11, x8 -; CHECK-GI-NEXT: add v0.4s, v6.4s, v0.4s -; CHECK-GI-NEXT: ushll v5.8h, v5.8b, #0 -; CHECK-GI-NEXT: ushll v17.8h, v17.8b, #0 -; CHECK-GI-NEXT: add v2.4s, v16.4s, v2.4s -; CHECK-GI-NEXT: add v3.4s, v3.4s, v4.4s -; CHECK-GI-NEXT: uabdl v4.4s, v1.4h, v7.4h -; CHECK-GI-NEXT: uabdl2 v1.4s, v1.8h, v7.8h -; CHECK-GI-NEXT: ldr d7, [x10] +; CHECK-GI-NEXT: usubl v3.8h, v3.8b, v4.8b +; CHECK-GI-NEXT: abs v5.4s, v5.4s +; CHECK-GI-NEXT: abs v0.4s, v0.4s +; CHECK-GI-NEXT: ldr d4, [x10] ; CHECK-GI-NEXT: ldr d16, [x11] +; CHECK-GI-NEXT: abs v7.4s, v7.4s +; CHECK-GI-NEXT: abs v1.4s, v1.4s ; CHECK-GI-NEXT: add x10, x10, x9 ; CHECK-GI-NEXT: add x11, x11, x8 -; CHECK-GI-NEXT: ldr d18, [x10] -; CHECK-GI-NEXT: ldr d20, [x10, x9] -; CHECK-GI-NEXT: ldr d19, [x11] -; CHECK-GI-NEXT: ldr d21, [x11, x8] -; CHECK-GI-NEXT: uabdl v6.4s, v5.4h, v17.4h -; CHECK-GI-NEXT: ushll v7.8h, v7.8b, #0 -; CHECK-GI-NEXT: ushll v16.8h, v16.8b, #0 -; CHECK-GI-NEXT: uabdl2 v5.4s, v5.8h, v17.8h -; CHECK-GI-NEXT: ushll v17.8h, v18.8b, #0 -; CHECK-GI-NEXT: ushll v18.8h, v19.8b, #0 -; CHECK-GI-NEXT: add v1.4s, v4.4s, v1.4s -; CHECK-GI-NEXT: ushll v4.8h, v20.8b, #0 -; CHECK-GI-NEXT: ushll v19.8h, v21.8b, #0 -; CHECK-GI-NEXT: addv s2, v2.4s +; CHECK-GI-NEXT: usubl v2.8h, v2.8b, v6.8b +; CHECK-GI-NEXT: ldr d6, [x10] +; CHECK-GI-NEXT: ldr d17, [x11] +; CHECK-GI-NEXT: add x10, x10, x9 +; CHECK-GI-NEXT: add x11, x11, x8 +; CHECK-GI-NEXT: usubl v4.8h, v4.8b, v16.8b +; CHECK-GI-NEXT: sshll v16.4s, v3.4h, #0 +; CHECK-GI-NEXT: sshll2 v3.4s, v3.8h, #0 +; CHECK-GI-NEXT: add v0.4s, v5.4s, v0.4s +; CHECK-GI-NEXT: add v1.4s, v7.4s, v1.4s +; CHECK-GI-NEXT: ldr d5, [x10] +; CHECK-GI-NEXT: ldr d7, [x11] +; CHECK-GI-NEXT: sshll v18.4s, v2.4h, #0 +; CHECK-GI-NEXT: sshll2 v2.4s, v2.8h, #0 +; CHECK-GI-NEXT: usubl v6.8h, v6.8b, v17.8b +; CHECK-GI-NEXT: ldr d17, [x11, x8] +; CHECK-GI-NEXT: sshll v19.4s, v4.4h, #0 +; CHECK-GI-NEXT: usubl v5.8h, v5.8b, v7.8b +; CHECK-GI-NEXT: ldr d7, [x10, x9] +; CHECK-GI-NEXT: sshll2 v4.4s, v4.8h, #0 +; CHECK-GI-NEXT: abs v16.4s, v16.4s +; CHECK-GI-NEXT: abs v3.4s, v3.4s +; CHECK-GI-NEXT: abs v18.4s, v18.4s +; CHECK-GI-NEXT: abs v2.4s, v2.4s +; CHECK-GI-NEXT: usubl v7.8h, v7.8b, v17.8b +; CHECK-GI-NEXT: sshll v17.4s, v6.4h, #0 +; CHECK-GI-NEXT: sshll2 v6.4s, v6.8h, #0 +; CHECK-GI-NEXT: abs v19.4s, v19.4s +; CHECK-GI-NEXT: abs v4.4s, v4.4s +; CHECK-GI-NEXT: add v3.4s, v16.4s, v3.4s +; CHECK-GI-NEXT: sshll v16.4s, v5.4h, #0 +; CHECK-GI-NEXT: sshll2 v5.4s, v5.8h, #0 +; CHECK-GI-NEXT: add v2.4s, v18.4s, v2.4s +; CHECK-GI-NEXT: abs v17.4s, v17.4s +; CHECK-GI-NEXT: addv s1, v1.4s +; CHECK-GI-NEXT: abs v6.4s, v6.4s ; CHECK-GI-NEXT: addv s0, v0.4s +; CHECK-GI-NEXT: add v4.4s, v19.4s, v4.4s ; CHECK-GI-NEXT: addv s3, v3.4s -; CHECK-GI-NEXT: uabdl v20.4s, v7.4h, v16.4h -; CHECK-GI-NEXT: uabdl2 v7.4s, v7.8h, v16.8h -; CHECK-GI-NEXT: add v5.4s, v6.4s, v5.4s -; CHECK-GI-NEXT: uabdl v6.4s, v17.4h, v18.4h -; CHECK-GI-NEXT: uabdl2 v16.4s, v17.8h, v18.8h -; CHECK-GI-NEXT: uabdl v17.4s, v4.4h, v19.4h -; CHECK-GI-NEXT: uabdl2 v4.4s, v4.8h, v19.8h -; CHECK-GI-NEXT: fmov w8, s2 -; CHECK-GI-NEXT: addv s1, v1.4s +; CHECK-GI-NEXT: sshll v18.4s, v7.4h, #0 +; CHECK-GI-NEXT: sshll2 v7.4s, v7.8h, #0 +; CHECK-GI-NEXT: abs v16.4s, v16.4s +; CHECK-GI-NEXT: abs v5.4s, v5.4s +; CHECK-GI-NEXT: fmov w8, s1 +; CHECK-GI-NEXT: add v6.4s, v17.4s, v6.4s +; CHECK-GI-NEXT: addv s2, v2.4s ; CHECK-GI-NEXT: fmov w9, s0 +; CHECK-GI-NEXT: addv s4, v4.4s ; CHECK-GI-NEXT: fmov w10, s3 -; CHECK-GI-NEXT: add v7.4s, v20.4s, v7.4s -; CHECK-GI-NEXT: add v0.4s, v17.4s, v4.4s -; CHECK-GI-NEXT: addv s4, v5.4s -; CHECK-GI-NEXT: add v2.4s, v6.4s, v16.4s +; CHECK-GI-NEXT: abs v18.4s, v18.4s +; CHECK-GI-NEXT: abs v7.4s, v7.4s +; CHECK-GI-NEXT: add v1.4s, v16.4s, v5.4s ; CHECK-GI-NEXT: add w8, w8, w9 -; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: addv s3, v6.4s +; CHECK-GI-NEXT: fmov w9, s2 ; CHECK-GI-NEXT: add w8, w10, w8 -; CHECK-GI-NEXT: addv s3, v7.4s -; CHECK-GI-NEXT: addv s1, v2.4s -; CHECK-GI-NEXT: addv s0, v0.4s -; CHECK-GI-NEXT: add w8, w9, w8 -; CHECK-GI-NEXT: fmov w9, s4 +; CHECK-GI-NEXT: fmov w10, s4 +; CHECK-GI-NEXT: add v0.4s, v18.4s, v7.4s +; CHECK-GI-NEXT: addv s1, v1.4s ; CHECK-GI-NEXT: add w8, w9, w8 ; CHECK-GI-NEXT: fmov w9, s3 +; CHECK-GI-NEXT: add w8, w10, w8 +; CHECK-GI-NEXT: addv s0, v0.4s ; CHECK-GI-NEXT: add w8, w9, w8 ; CHECK-GI-NEXT: fmov w9, s1 ; CHECK-GI-NEXT: add w8, w9, w8 diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll index 50423c59eabe9..526d5c946ec7f 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll @@ -108,7 +108,7 @@ define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %pt } ; no-op -; HSA-LABEl: {{^}}use_constant_to_flat_addrspacecast: +; HSA-LABEL: {{^}}use_constant_to_flat_addrspacecast: ; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]] ; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]] ; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]] @@ -119,7 +119,7 @@ define amdgpu_kernel void @use_constant_to_flat_addrspacecast(ptr addrspace(4) % ret void } -; HSA-LABEl: {{^}}use_constant_to_global_addrspacecast: +; HSA-LABEL: {{^}}use_constant_to_global_addrspacecast: ; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]] ; CI-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]] ; CI-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]] diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll index 9d4f9434aa314..1a0fda3d54d3f 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll @@ -142,7 +142,8 @@ attributes #0 = { nounwind } ; GCN: amdpal.pipelines: ; GCN-NEXT: - .registers: -; GCN-NEXT: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xaf01ca{{$}} +; SDAG-NEXT: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xaf01ca{{$}} +; GISEL-NEXT: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xaf01cb{{$}} ; GCN-NEXT: '0x2e13 (COMPUTE_PGM_RSRC2)': 0x8001{{$}} ; GCN-NEXT: .shader_functions: ; GCN-NEXT: dynamic_stack: @@ -156,10 +157,10 @@ attributes #0 = { nounwind } ; GCN-NEXT: .backend_stack_size: 0x10{{$}} ; GCN-NEXT: .lds_size: 0{{$}} ; SDAG-NEXT: .sgpr_count: 0x25{{$}} -; GISEL-NEXT: .sgpr_count: 0x26{{$}} +; GISEL-NEXT: .sgpr_count: 0x27{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} ; SDAG-NEXT: .vgpr_count: 0x3{{$}} -; GISEL-NEXT: .vgpr_count: 0x4{{$}} +; GISEL-NEXT: .vgpr_count: 0x5{{$}} ; GCN-NEXT: multiple_stack: ; GCN-NEXT: .backend_stack_size: 0x24{{$}} ; GCN-NEXT: .lds_size: 0{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir index 29621a0477418..1151bde02ef62 100644 --- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir +++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir @@ -4,7 +4,7 @@ --- -# GCN-label: name: vop3 +# GCN-LABEL: name: vop3 # GCN: %6:vgpr_32, %7:sreg_32_xm0_xexec = V_SUBBREV_U32_e64_dpp %3, %0, %1, %5, 1, 1, 15, 15, 1, implicit $exec # GCN: %8:vgpr_32 = V_CVT_PK_U8_F32_e64_dpp %3, 4, %0, 2, %2, 2, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec # GCN: %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit $mode, implicit $exec @@ -37,7 +37,7 @@ body: | ... --- -# GCN-label: name: vop3_sgpr_src1 +# GCN-LABEL: name: vop3_sgpr_src1 # GCN: %6:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %1, 0, %2, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec # GFX1100: %8:vgpr_32 = V_MED3_F32_e64 0, %7, 0, %2, 0, %1, 0, 0, implicit $mode, implicit $exec # GFX1150: %8:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %1, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec @@ -81,7 +81,7 @@ body: | --- # Regression test for src_modifiers on base u16 opcode -# GCN-label: name: vop3_u16 +# GCN-LABEL: name: vop3_u16 # GCN: %5:vgpr_32 = V_ADD_NC_U16_e64_dpp %3, 0, %1, 0, %3, 0, 0, 1, 15, 15, 1, implicit $exec # GCN: %7:vgpr_32 = V_ADD_NC_U16_e64_dpp %3, 1, %5, 2, %5, 0, 0, 1, 15, 15, 1, implicit $exec # GCN: %9:vgpr_32 = V_ADD_NC_U16_e64 4, %8, 8, %7, 0, 0, implicit $exec @@ -205,7 +205,7 @@ body: | ... # do not combine, dpp arg used twice -# GCN-label: name: dpp_arg_twice +# GCN-LABEL: name: dpp_arg_twice # GCN: %4:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %3, 2, %3, 1, 2, implicit $mode, implicit $exec # GCN: %6:vgpr_32 = V_FMA_F32_e64 2, %5, 2, %1, 2, %5, 1, 2, implicit $mode, implicit $exec # GCN: %8:vgpr_32 = V_FMA_F32_e64 2, %7, 2, %7, 2, %1, 1, 2, implicit $mode, implicit $exec @@ -231,7 +231,7 @@ body: | ... # when the dpp source isn't a src0 operand the operation should be commuted if possible -# GCN-label: name: dpp_commute_e64 +# GCN-LABEL: name: dpp_commute_e64 # GCN: %4:vgpr_32 = V_MUL_U32_U24_e64_dpp %1, %0, %1, 1, 1, 14, 15, 0, implicit $exec # GCN: %7:vgpr_32 = V_FMA_F32_e64_dpp %5, 2, %0, 1, %1, 2, %1, 1, 2, 1, 15, 15, 1, implicit $mode, implicit $exec # GCN: %10:vgpr_32 = V_SUBREV_U32_e64_dpp %1, %0, %1, 1, 1, 14, 15, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll index 9690e126dfcfc..3ec36f03a48aa 100644 --- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll +++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll @@ -3249,3 +3249,209 @@ define double @v_fmaximum3_f64_const1_const2(double %a) { %max1 = call double @llvm.maximum.f64(double %max0, double 16.0) ret double %max1 } + +define <2 x float> @v_no_fmaximum3_f32__multi_use(float %a, float %b, float %c) { +; GFX12-LABEL: v_no_fmaximum3_f32__multi_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_maximum_f32 v0, v0, v1 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_maximum_f32 v1, v0, v2 +; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_no_fmaximum3_f32__multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_max_f32_e32 v3, v0, v1 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX9-NEXT: v_max_f32_e32 v1, v0, v2 +; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %max0 = call float @llvm.maximum.f32(float %a, float %b) + %max1 = call float @llvm.maximum.f32(float %max0, float %c) + %insert.0 = insertelement <2 x float> poison, float %max0, i32 0 + %insert.1 = insertelement <2 x float> %insert.0, float %max1, i32 1 + ret <2 x float> %insert.1 +} + +define amdgpu_ps <2 x i32> @s_no_fmaximum3_f32__multi_use(float inreg %a, float inreg %b, float inreg %c) { +; GFX12-LABEL: s_no_fmaximum3_f32__multi_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_maximum_f32 s0, s0, s1 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GFX12-NEXT: s_maximum_f32 s1, s0, s2 +; GFX12-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_no_fmaximum3_f32__multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: v_max_f32_e32 v1, s0, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX9-NEXT: v_max_f32_e32 v1, s2, v0 +; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: v_readfirstlane_b32 s1, v1 +; GFX9-NEXT: ; return to shader part epilog + %max0 = call float @llvm.maximum.f32(float %a, float %b) + %max1 = call float @llvm.maximum.f32(float %max0, float %c) + %cast0 = bitcast float %max0 to i32 + %cast1 = bitcast float %max1 to i32 + %readfirstlane0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast0) + %readfirstlane1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast1) + %insert.0 = insertelement <2 x i32> poison, i32 %readfirstlane0, i32 0 + %insert.1 = insertelement <2 x i32> %insert.0, i32 %readfirstlane1, i32 1 + ret <2 x i32> %insert.1 +} + +define <2 x half> @v_no_fmaximum3_f16__multi_use(half %a, half %b, half %c) { +; GFX12-LABEL: v_no_fmaximum3_f16__multi_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_maximum_f16 v0, v0, v1 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_maximum_f16 v1, v0, v2 +; GFX12-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_no_fmaximum3_f16__multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_max_f16_e32 v3, v0, v1 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00 +; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX9-NEXT: v_max_f16_e32 v1, v0, v2 +; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc +; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %max0 = call half @llvm.maximum.f16(half %a, half %b) + %max1 = call half @llvm.maximum.f16(half %max0, half %c) + %insert.0 = insertelement <2 x half> poison, half %max0, i32 0 + %insert.1 = insertelement <2 x half> %insert.0, half %max1, i32 1 + ret <2 x half> %insert.1 +} + +define amdgpu_ps <2 x i32> @s_no_fmaximum3_f16__multi_use(half inreg %a, half inreg %b, half inreg %c) { +; GFX12-LABEL: s_no_fmaximum3_f16__multi_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_maximum_f16 s0, s0, s1 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) +; GFX12-NEXT: s_maximum_f16 s1, s0, s2 +; GFX12-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX12-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX12-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_no_fmaximum3_f16__multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: v_max_f16_e32 v1, s0, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00 +; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX9-NEXT: v_max_f16_e32 v1, s2, v0 +; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s2, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: v_readfirstlane_b32 s1, v1 +; GFX9-NEXT: ; return to shader part epilog + %max0 = call half @llvm.maximum.f16(half %a, half %b) + %max1 = call half @llvm.maximum.f16(half %max0, half %c) + %cast0 = bitcast half %max0 to i16 + %cast1 = bitcast half %max1 to i16 + %ext0 = zext i16 %cast0 to i32 + %ext1 = zext i16 %cast1 to i32 + %readfirstlane0 = call i32 @llvm.amdgcn.readfirstlane(i32 %ext0) + %readfirstlane1 = call i32 @llvm.amdgcn.readfirstlane(i32 %ext1) + %insert.0 = insertelement <2 x i32> poison, i32 %readfirstlane0, i32 0 + %insert.1 = insertelement <2 x i32> %insert.0, i32 %readfirstlane1, i32 1 + ret <2 x i32> %insert.1 +} + +define <4 x half> @v_no_fmaximum3_v2f16__multi_use(<2 x half> %a, <2 x half> %b, <2 x half> %c) { +; GFX12-LABEL: v_no_fmaximum3_v2f16__multi_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_pk_maximum_f16 v1, v0, v2 +; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_no_fmaximum3_v2f16__multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_max_f16 v3, v0, v1 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00 +; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v1, v5, s4 +; GFX9-NEXT: v_pk_max_f16 v3, v0, v2 +; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v5, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX9-NEXT: v_perm_b32 v1, v1, v5, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %max0 = call <2 x half> @llvm.maximum.f16(<2 x half> %a, <2 x half> %b) + %max1 = call <2 x half> @llvm.maximum.f16(<2 x half> %max0, <2 x half> %c) + %concat = shufflevector <2 x half> %max0, <2 x half> %max1, <4 x i32> + ret <4 x half> %concat +} + +define <2 x double> @v_no_fmaximum3_f64__multi_use(double %a, double %b, double %c) { +; GFX12-LABEL: v_no_fmaximum3_f64__multi_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_maximum_f64 v[2:3], v[0:1], v[4:5] +; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_no_fmaximum3_f64__multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc +; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] +; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %max0 = call double @llvm.maximum.f64(double %a, double %b) + %max1 = call double @llvm.maximum.f64(double %max0, double %c) + %insert.0 = insertelement <2 x double> poison, double %max0, i32 0 + %insert.1 = insertelement <2 x double> %insert.0, double %max1, i32 1 + ret <2 x double> %insert.1 +} diff --git a/llvm/test/CodeGen/AMDGPU/fminimum3.ll b/llvm/test/CodeGen/AMDGPU/fminimum3.ll index 7481fff251d89..0e0b73b88d2dc 100644 --- a/llvm/test/CodeGen/AMDGPU/fminimum3.ll +++ b/llvm/test/CodeGen/AMDGPU/fminimum3.ll @@ -3249,3 +3249,209 @@ define double @v_fminimum3_f64_const1_const2(double %a) { %max1 = call double @llvm.minimum.f64(double %max0, double 16.0) ret double %max1 } + +define <2 x float> @v_no_fminimum3_f32__multi_use(float %a, float %b, float %c) { +; GFX12-LABEL: v_no_fminimum3_f32__multi_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_minimum_f32 v0, v0, v1 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_minimum_f32 v1, v0, v2 +; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_no_fminimum3_f32__multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_min_f32_e32 v3, v0, v1 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX9-NEXT: v_min_f32_e32 v1, v0, v2 +; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %max0 = call float @llvm.minimum.f32(float %a, float %b) + %max1 = call float @llvm.minimum.f32(float %max0, float %c) + %insert.0 = insertelement <2 x float> poison, float %max0, i32 0 + %insert.1 = insertelement <2 x float> %insert.0, float %max1, i32 1 + ret <2 x float> %insert.1 +} + +define amdgpu_ps <2 x i32> @s_no_fminimum3_f32__multi_use(float inreg %a, float inreg %b, float inreg %c) { +; GFX12-LABEL: s_no_fminimum3_f32__multi_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_minimum_f32 s0, s0, s1 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GFX12-NEXT: s_minimum_f32 s1, s0, s2 +; GFX12-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_no_fminimum3_f32__multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: v_min_f32_e32 v1, s0, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX9-NEXT: v_min_f32_e32 v1, s2, v0 +; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: v_readfirstlane_b32 s1, v1 +; GFX9-NEXT: ; return to shader part epilog + %max0 = call float @llvm.minimum.f32(float %a, float %b) + %max1 = call float @llvm.minimum.f32(float %max0, float %c) + %cast0 = bitcast float %max0 to i32 + %cast1 = bitcast float %max1 to i32 + %readfirstlane0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast0) + %readfirstlane1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast1) + %insert.0 = insertelement <2 x i32> poison, i32 %readfirstlane0, i32 0 + %insert.1 = insertelement <2 x i32> %insert.0, i32 %readfirstlane1, i32 1 + ret <2 x i32> %insert.1 +} + +define <2 x half> @v_no_fminimum3_f16__multi_use(half %a, half %b, half %c) { +; GFX12-LABEL: v_no_fminimum3_f16__multi_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_minimum_f16 v0, v0, v1 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_minimum_f16 v1, v0, v2 +; GFX12-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_no_fminimum3_f16__multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_min_f16_e32 v3, v0, v1 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00 +; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX9-NEXT: v_min_f16_e32 v1, v0, v2 +; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc +; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %max0 = call half @llvm.minimum.f16(half %a, half %b) + %max1 = call half @llvm.minimum.f16(half %max0, half %c) + %insert.0 = insertelement <2 x half> poison, half %max0, i32 0 + %insert.1 = insertelement <2 x half> %insert.0, half %max1, i32 1 + ret <2 x half> %insert.1 +} + +define amdgpu_ps <2 x i32> @s_no_fminimum3_f16__multi_use(half inreg %a, half inreg %b, half inreg %c) { +; GFX12-LABEL: s_no_fminimum3_f16__multi_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_minimum_f16 s0, s0, s1 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) +; GFX12-NEXT: s_minimum_f16 s1, s0, s2 +; GFX12-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX12-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX12-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_no_fminimum3_f16__multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: v_min_f16_e32 v1, s0, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00 +; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX9-NEXT: v_min_f16_e32 v1, s2, v0 +; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s2, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: v_readfirstlane_b32 s1, v1 +; GFX9-NEXT: ; return to shader part epilog + %max0 = call half @llvm.minimum.f16(half %a, half %b) + %max1 = call half @llvm.minimum.f16(half %max0, half %c) + %cast0 = bitcast half %max0 to i16 + %cast1 = bitcast half %max1 to i16 + %ext0 = zext i16 %cast0 to i32 + %ext1 = zext i16 %cast1 to i32 + %readfirstlane0 = call i32 @llvm.amdgcn.readfirstlane(i32 %ext0) + %readfirstlane1 = call i32 @llvm.amdgcn.readfirstlane(i32 %ext1) + %insert.0 = insertelement <2 x i32> poison, i32 %readfirstlane0, i32 0 + %insert.1 = insertelement <2 x i32> %insert.0, i32 %readfirstlane1, i32 1 + ret <2 x i32> %insert.1 +} + +define <4 x half> @v_no_fminimum3_v2f16__multi_use(<2 x half> %a, <2 x half> %b, <2 x half> %c) { +; GFX12-LABEL: v_no_fminimum3_v2f16__multi_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v1 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_pk_minimum_f16 v1, v0, v2 +; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_no_fminimum3_v2f16__multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_min_f16 v3, v0, v1 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00 +; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v1, v5, s4 +; GFX9-NEXT: v_pk_min_f16 v3, v0, v2 +; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v5, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX9-NEXT: v_perm_b32 v1, v1, v5, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %max0 = call <2 x half> @llvm.minimum.f16(<2 x half> %a, <2 x half> %b) + %max1 = call <2 x half> @llvm.minimum.f16(<2 x half> %max0, <2 x half> %c) + %concat = shufflevector <2 x half> %max0, <2 x half> %max1, <4 x i32> + ret <4 x half> %concat +} + +define <2 x double> @v_no_fminimum3_f64__multi_use(double %a, double %b, double %c) { +; GFX12-LABEL: v_no_fminimum3_f64__multi_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_minimum_f64 v[2:3], v[0:1], v[4:5] +; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_no_fminimum3_f64__multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc +; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] +; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %max0 = call double @llvm.minimum.f64(double %a, double %b) + %max1 = call double @llvm.minimum.f64(double %max0, double %c) + %insert.0 = insertelement <2 x double> poison, double %max0, i32 0 + %insert.1 = insertelement <2 x double> %insert.0, double %max1, i32 1 + ret <2 x double> %insert.1 +} diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll index 9547f08d3eba6..1429251fc6421 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll @@ -1,17 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s -; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s -; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-SDAG %s -; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-GISEL %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s define amdgpu_kernel void @workgroup_ids_kernel() { ; GFX9-LABEL: workgroup_ids_kernel: ; GFX9: ; %bb.0: ; %.entry -; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 ; GFX9-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -72,27 +72,20 @@ define amdgpu_kernel void @workgroup_ids_kernel() { define amdgpu_kernel void @caller() { ; GFX9-SDAG-LABEL: caller: ; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-SDAG-NEXT: s_mov_b32 s38, -1 -; GFX9-SDAG-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-SDAG-NEXT: s_add_u32 s36, s36, s7 -; GFX9-SDAG-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-SDAG-NEXT: s_add_u32 s8, s2, 36 -; GFX9-SDAG-NEXT: s_addc_u32 s9, s3, 0 -; GFX9-SDAG-NEXT: s_getpc_b64 s[2:3] -; GFX9-SDAG-NEXT: s_add_u32 s2, s2, callee@gotpcrel32@lo+4 -; GFX9-SDAG-NEXT: s_addc_u32 s3, s3, callee@gotpcrel32@hi+12 -; GFX9-SDAG-NEXT: s_load_dwordx2 s[14:15], s[2:3], 0x0 -; GFX9-SDAG-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX9-SDAG-NEXT: s_add_u32 flat_scratch_lo, s10, s13 +; GFX9-SDAG-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 +; GFX9-SDAG-NEXT: s_add_u32 s0, s0, s13 +; GFX9-SDAG-NEXT: s_addc_u32 s1, s1, 0 +; GFX9-SDAG-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX9-SDAG-NEXT: s_getpc_b64 s[8:9] +; GFX9-SDAG-NEXT: s_add_u32 s8, s8, callee@gotpcrel32@lo+4 +; GFX9-SDAG-NEXT: s_addc_u32 s9, s9, callee@gotpcrel32@hi+12 +; GFX9-SDAG-NEXT: s_load_dwordx2 s[14:15], s[8:9], 0x0 ; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1] -; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-SDAG-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-SDAG-NEXT: s_mov_b32 s12, s6 -; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-SDAG-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s12 ; GFX9-SDAG-NEXT: s_mov_b32 s32, 0 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-SDAG-NEXT: s_swappc_b64 s[30:31], s[14:15] @@ -100,27 +93,20 @@ define amdgpu_kernel void @caller() { ; ; GFX9-GISEL-LABEL: caller: ; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-GISEL-NEXT: s_mov_b32 s38, -1 -; GFX9-GISEL-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-GISEL-NEXT: s_add_u32 s36, s36, s7 -; GFX9-GISEL-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-GISEL-NEXT: s_add_u32 s8, s2, 36 -; GFX9-GISEL-NEXT: s_addc_u32 s9, s3, 0 -; GFX9-GISEL-NEXT: s_mov_b64 s[10:11], s[4:5] -; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], s[0:1] -; GFX9-GISEL-NEXT: s_getpc_b64 s[0:1] -; GFX9-GISEL-NEXT: s_add_u32 s0, s0, callee@gotpcrel32@lo+4 -; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, callee@gotpcrel32@hi+12 -; GFX9-GISEL-NEXT: s_load_dwordx2 s[14:15], s[0:1], 0x0 +; GFX9-GISEL-NEXT: s_add_u32 flat_scratch_lo, s10, s13 +; GFX9-GISEL-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 +; GFX9-GISEL-NEXT: s_add_u32 s0, s0, s13 +; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0 +; GFX9-GISEL-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX9-GISEL-NEXT: s_getpc_b64 s[8:9] +; GFX9-GISEL-NEXT: s_add_u32 s8, s8, callee@gotpcrel32@lo+4 +; GFX9-GISEL-NEXT: s_addc_u32 s9, s9, callee@gotpcrel32@hi+12 +; GFX9-GISEL-NEXT: s_load_dwordx2 s[14:15], s[8:9], 0x0 ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2 -; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-GISEL-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s6 -; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-GISEL-NEXT: s_mov_b32 s12, s6 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s12 +; GFX9-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX9-GISEL-NEXT: s_mov_b32 s32, 0 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-GISEL-NEXT: s_swappc_b64 s[30:31], s[14:15] @@ -128,81 +114,61 @@ define amdgpu_kernel void @caller() { ; ; GFX9ARCH-SDAG-LABEL: caller: ; GFX9ARCH-SDAG: ; %bb.0: -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s38, -1 -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9ARCH-SDAG-NEXT: s_add_u32 s36, s36, s6 -; GFX9ARCH-SDAG-NEXT: s_addc_u32 s37, s37, 0 -; GFX9ARCH-SDAG-NEXT: s_add_u32 s8, s2, 36 -; GFX9ARCH-SDAG-NEXT: s_addc_u32 s9, s3, 0 -; GFX9ARCH-SDAG-NEXT: s_getpc_b64 s[2:3] -; GFX9ARCH-SDAG-NEXT: s_add_u32 s2, s2, callee@gotpcrel32@lo+4 -; GFX9ARCH-SDAG-NEXT: s_addc_u32 s3, s3, callee@gotpcrel32@hi+12 -; GFX9ARCH-SDAG-NEXT: s_load_dwordx2 s[6:7], s[2:3], 0x0 -; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX9ARCH-SDAG-NEXT: s_add_u32 flat_scratch_lo, s10, s12 +; GFX9ARCH-SDAG-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 +; GFX9ARCH-SDAG-NEXT: s_add_u32 s0, s0, s12 +; GFX9ARCH-SDAG-NEXT: s_addc_u32 s1, s1, 0 +; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX9ARCH-SDAG-NEXT: s_getpc_b64 s[8:9] +; GFX9ARCH-SDAG-NEXT: s_add_u32 s8, s8, callee@gotpcrel32@lo+4 +; GFX9ARCH-SDAG-NEXT: s_addc_u32 s9, s9, callee@gotpcrel32@hi+12 +; GFX9ARCH-SDAG-NEXT: s_load_dwordx2 s[12:13], s[8:9], 0x0 ; GFX9ARCH-SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX9ARCH-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1] -; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9ARCH-SDAG-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9 ; GFX9ARCH-SDAG-NEXT: s_mov_b32 s32, 0 ; GFX9ARCH-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX9ARCH-SDAG-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX9ARCH-SDAG-NEXT: s_swappc_b64 s[30:31], s[12:13] ; GFX9ARCH-SDAG-NEXT: s_endpgm ; ; GFX9ARCH-GISEL-LABEL: caller: ; GFX9ARCH-GISEL: ; %bb.0: -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s38, -1 -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9ARCH-GISEL-NEXT: s_add_u32 s36, s36, s6 -; GFX9ARCH-GISEL-NEXT: s_addc_u32 s37, s37, 0 -; GFX9ARCH-GISEL-NEXT: s_add_u32 s8, s2, 36 -; GFX9ARCH-GISEL-NEXT: s_addc_u32 s9, s3, 0 -; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[10:11], s[4:5] -; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[4:5], s[0:1] -; GFX9ARCH-GISEL-NEXT: s_getpc_b64 s[0:1] -; GFX9ARCH-GISEL-NEXT: s_add_u32 s0, s0, callee@gotpcrel32@lo+4 -; GFX9ARCH-GISEL-NEXT: s_addc_u32 s1, s1, callee@gotpcrel32@hi+12 -; GFX9ARCH-GISEL-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 +; GFX9ARCH-GISEL-NEXT: s_add_u32 flat_scratch_lo, s10, s12 +; GFX9ARCH-GISEL-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 +; GFX9ARCH-GISEL-NEXT: s_add_u32 s0, s0, s12 +; GFX9ARCH-GISEL-NEXT: s_addc_u32 s1, s1, 0 +; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX9ARCH-GISEL-NEXT: s_getpc_b64 s[8:9] +; GFX9ARCH-GISEL-NEXT: s_add_u32 s8, s8, callee@gotpcrel32@lo+4 +; GFX9ARCH-GISEL-NEXT: s_addc_u32 s9, s9, callee@gotpcrel32@hi+12 +; GFX9ARCH-GISEL-NEXT: s_load_dwordx2 s[12:13], s[8:9], 0x0 ; GFX9ARCH-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9ARCH-GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2 -; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9ARCH-GISEL-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9 -; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX9ARCH-GISEL-NEXT: s_mov_b32 s32, 0 ; GFX9ARCH-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9ARCH-GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX9ARCH-GISEL-NEXT: s_swappc_b64 s[30:31], s[12:13] ; GFX9ARCH-GISEL-NEXT: s_endpgm ; -; GFX12-SDAG-LABEL: caller: -; GFX12-SDAG: ; %bb.0: -; GFX12-SDAG-NEXT: v_dual_mov_b32 v31, v0 :: v_dual_mov_b32 v0, ttmp9 -; GFX12-SDAG-NEXT: s_mov_b64 s[10:11], s[4:5] -; GFX12-SDAG-NEXT: s_mov_b32 s7, callee@abs32@hi -; GFX12-SDAG-NEXT: s_mov_b32 s6, callee@abs32@lo -; GFX12-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1] -; GFX12-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX12-SDAG-NEXT: s_mov_b32 s32, 0 -; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX12-SDAG-NEXT: s_endpgm -; -; GFX12-GISEL-LABEL: caller: -; GFX12-GISEL: ; %bb.0: -; GFX12-GISEL-NEXT: v_dual_mov_b32 v31, v0 :: v_dual_mov_b32 v0, ttmp9 -; GFX12-GISEL-NEXT: s_mov_b64 s[10:11], s[4:5] -; GFX12-GISEL-NEXT: s_mov_b32 s6, callee@abs32@lo -; GFX12-GISEL-NEXT: s_mov_b32 s7, callee@abs32@hi -; GFX12-GISEL-NEXT: s_mov_b64 s[4:5], s[0:1] -; GFX12-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX12-GISEL-NEXT: s_mov_b32 s32, 0 -; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX12-GISEL-NEXT: s_endpgm +; GFX12-LABEL: caller: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX12-NEXT: s_getpc_b64 s[4:5] +; GFX12-NEXT: s_sext_i32_i16 s5, s5 +; GFX12-NEXT: s_add_co_u32 s4, s4, callee@gotpcrel32@lo+8 +; GFX12-NEXT: s_add_co_ci_u32 s5, s5, callee@gotpcrel32@hi+16 +; GFX12-NEXT: v_dual_mov_b32 v31, v0 :: v_dual_mov_b32 v0, ttmp9 +; GFX12-NEXT: s_load_b64 s[6:7], s[4:5], 0x0 +; GFX12-NEXT: s_mov_b64 s[4:5], s[0:1] +; GFX12-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX12-NEXT: s_mov_b32 s32, 0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX12-NEXT: s_endpgm %idx = call i32 @llvm.amdgcn.workgroup.id.x() call void @callee(i32 %idx) #0 ret void diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll index 14fe4e5f48c67..8009f917aef5a 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-SDAG %s -; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-GISEL %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s @@ -67,62 +67,37 @@ define amdgpu_cs void @_amdgpu_cs_main() { } define amdgpu_cs void @caller() { -; GFX9-LABEL: caller: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s10, -1 -; GFX9-NEXT: s_mov_b32 s11, 0xe00000 -; GFX9-NEXT: s_add_u32 s8, s8, s0 -; GFX9-NEXT: s_addc_u32 s9, s9, 0 -; GFX9-NEXT: s_getpc_b64 s[0:1] -; GFX9-NEXT: s_add_u32 s0, s0, callee@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s1, s1, callee@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[8:9] -; GFX9-NEXT: s_mov_b64 s[2:3], s[10:11] -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm -; ; GFX9ARCH-SDAG-LABEL: caller: ; GFX9ARCH-SDAG: ; %bb.0: -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s10, -1 -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s11, 0xe00000 +; GFX9ARCH-SDAG-NEXT: s_getpc_b64 s[8:9] +; GFX9ARCH-SDAG-NEXT: s_mov_b32 s8, s0 +; GFX9ARCH-SDAG-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x10 +; GFX9ARCH-SDAG-NEXT: s_mov_b32 s5, callee@abs32@hi +; GFX9ARCH-SDAG-NEXT: s_mov_b32 s4, callee@abs32@lo +; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX9ARCH-SDAG-NEXT: s_mov_b32 s32, 0 +; GFX9ARCH-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX9ARCH-SDAG-NEXT: s_add_u32 s8, s8, s0 ; GFX9ARCH-SDAG-NEXT: s_addc_u32 s9, s9, 0 -; GFX9ARCH-SDAG-NEXT: s_getpc_b64 s[0:1] -; GFX9ARCH-SDAG-NEXT: s_add_u32 s0, s0, callee@gotpcrel32@lo+4 -; GFX9ARCH-SDAG-NEXT: s_addc_u32 s1, s1, callee@gotpcrel32@hi+12 -; GFX9ARCH-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[0:1], s[8:9] ; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] -; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9 -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s32, 0 -; GFX9ARCH-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX9ARCH-SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9ARCH-SDAG-NEXT: s_endpgm ; ; GFX9ARCH-GISEL-LABEL: caller: ; GFX9ARCH-GISEL: ; %bb.0: -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s10, -1 -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s11, 0xe00000 +; GFX9ARCH-GISEL-NEXT: s_getpc_b64 s[8:9] +; GFX9ARCH-GISEL-NEXT: s_mov_b32 s8, s0 +; GFX9ARCH-GISEL-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x10 +; GFX9ARCH-GISEL-NEXT: s_mov_b32 s4, callee@abs32@lo +; GFX9ARCH-GISEL-NEXT: s_mov_b32 s5, callee@abs32@hi +; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX9ARCH-GISEL-NEXT: s_mov_b32 s32, 0 +; GFX9ARCH-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9ARCH-GISEL-NEXT: s_add_u32 s8, s8, s0 ; GFX9ARCH-GISEL-NEXT: s_addc_u32 s9, s9, 0 -; GFX9ARCH-GISEL-NEXT: s_getpc_b64 s[0:1] -; GFX9ARCH-GISEL-NEXT: s_add_u32 s0, s0, callee@gotpcrel32@lo+4 -; GFX9ARCH-GISEL-NEXT: s_addc_u32 s1, s1, callee@gotpcrel32@hi+12 -; GFX9ARCH-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[0:1], s[8:9] -; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9 ; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s32, 0 -; GFX9ARCH-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9ARCH-GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9ARCH-GISEL-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll new file mode 100644 index 0000000000000..50a3336a7483c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll @@ -0,0 +1,1757 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s + +; Test if fcmp+select patterns form min/max instructions when allowed +; by flags. + +; TODO: Merge with fmin_legacy.ll/fmax_legacy.ll + +define float @v_test_fmin_legacy_ule_f32_safe(float %a, float %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_f32_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_f32_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_f32_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule float %a, %b + %val = select i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmin_legacy_ule_f32_nnan_flag(float %a, float %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_f32_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_f32_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_f32_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule float %a, %b + %val = select nnan i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmin_legacy_ule_f32_nsz_flag(float %a, float %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_f32_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_f32_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_f32_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule float %a, %b + %val = select nsz i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmin_legacy_ule_f32_nnan_nsz_flag(float %a, float %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_f32_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_f32_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_f32_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule float %a, %b + %val = select nnan nsz i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmax_legacy_uge_f32_safe(float %a, float %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f32_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f32_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f32_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge float %a, %b + %val = select i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmax_legacy_uge_f32_nnan_flag(float %a, float %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f32_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f32_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge float %a, %b + %val = select nnan i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmax_legacy_uge_f32_nsz_flag(float %a, float %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge float %a, %b + %val = select nsz i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmax_legacy_uge_f32_nnan_nsz_flag(float %a, float %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f32_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f32_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge float %a, %b + %val = select nnan nsz i1 %cmp, float %a, float %b + ret float %val +} + +define <2 x float> @v_test_fmin_legacy_ule_v2f32_safe(<2 x float> %a, <2 x float> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v2f32_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v2f32_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <2 x float> %a, %b + %val = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + ret <2 x float> %val +} + +define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_flag(<2 x float> %a, <2 x float> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <2 x float> %a, %b + %val = select nnan <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + ret <2 x float> %val +} + +define <2 x float> @v_test_fmin_legacy_ule_v2f32_nsz_flag(<2 x float> %a, <2 x float> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v2f32_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v2f32_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <2 x float> %a, %b + %val = select nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + ret <2 x float> %val +} + +define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag(<2 x float> %a, <2 x float> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <2 x float> %a, %b + %val = select nnan nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + ret <2 x float> %val +} + +define <2 x float> @v_test_fmax_legacy_uge_v2f32_safe(<2 x float> %a, <2 x float> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v2f32_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v2f32_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <2 x float> %a, %b + %val = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + ret <2 x float> %val +} + +define <2 x float> @v_test_fmax_legacy_uge_v2f32_nnan_flag(<2 x float> %a, <2 x float> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <2 x float> %a, %b + %val = select nnan <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + ret <2 x float> %val +} + +define <2 x float> @v_test_fmax_legacy_uge_v2f32_nsz_flag(<2 x float> %a, <2 x float> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v2f32_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v2f32_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <2 x float> %a, %b + %val = select nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + ret <2 x float> %val +} + +define <2 x float> @v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag(<2 x float> %a, <2 x float> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <2 x float> %a, %b + %val = select nnan nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + ret <2 x float> %val +} + +define half @v_test_fmin_legacy_ule_f16_safe(half %a, half %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_f16_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_f16_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_f16_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule half %a, %b + %val = select i1 %cmp, half %a, half %b + ret half %val +} + +define half @v_test_fmin_legacy_ule_f16_nnan_flag(half %a, half %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_f16_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_f16_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_f16_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule half %a, %b + %val = select nnan i1 %cmp, half %a, half %b + ret half %val +} + +define half @v_test_fmin_legacy_ule_f16_nsz_flag(half %a, half %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_f16_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_f16_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_f16_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule half %a, %b + %val = select nsz i1 %cmp, half %a, half %b + ret half %val +} + +define half @v_test_fmin_legacy_ule_f16_nnan_nsz_flag(half %a, half %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule half %a, %b + %val = select nnan nsz i1 %cmp, half %a, half %b + ret half %val +} + +define half @v_test_fmax_legacy_uge_f16_safe(half %a, half %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f16_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f16_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f16_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge half %a, %b + %val = select i1 %cmp, half %a, half %b + ret half %val +} + +define half @v_test_fmax_legacy_uge_f16_nnan_flag(half %a, half %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f16_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f16_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f16_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge half %a, %b + %val = select nnan i1 %cmp, half %a, half %b + ret half %val +} + +define half @v_test_fmax_legacy_uge_f16_nsz_flag(half %a, half %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f16_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f16_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f16_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge half %a, %b + %val = select nsz i1 %cmp, half %a, half %b + ret half %val +} + +define half @v_test_fmax_legacy_uge_f16_nnan_nsz_flag(half %a, half %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge half %a, %b + %val = select nnan nsz i1 %cmp, half %a, half %b + ret half %val +} + +define <2 x half> @v_test_fmin_legacy_ule_v2f16_safe(<2 x half> %a, <2 x half> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v2f16_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v2f16_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v2f16_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <2 x half> %a, %b + %val = select <2 x i1> %cmp, <2 x half> %a, <2 x half> %b + ret <2 x half> %val +} + +define <2 x half> @v_test_fmin_legacy_ule_v2f16_nnan_flag(<2 x half> %a, <2 x half> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <2 x half> %a, %b + %val = select nnan <2 x i1> %cmp, <2 x half> %a, <2 x half> %b + ret <2 x half> %val +} + +define <2 x half> @v_test_fmin_legacy_ule_v2f16_nsz_flag(<2 x half> %a, <2 x half> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v2f16_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v2f16_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v2f16_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <2 x half> %a, %b + %val = select nsz <2 x i1> %cmp, <2 x half> %a, <2 x half> %b + ret <2 x half> %val +} + +define <2 x half> @v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag(<2 x half> %a, <2 x half> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <2 x half> %a, %b + %val = select nnan nsz <2 x i1> %cmp, <2 x half> %a, <2 x half> %b + ret <2 x half> %val +} + +define <2 x half> @v_test_fmax_legacy_uge_v2f16_safe(<2 x half> %a, <2 x half> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v2f16_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v2f16_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v3, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v2f16_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <2 x half> %a, %b + %val = select <2 x i1> %cmp, <2 x half> %a, <2 x half> %b + ret <2 x half> %val +} + +define <2 x half> @v_test_fmax_legacy_uge_v2f16_nnan_flag(<2 x half> %a, <2 x half> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v3, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <2 x half> %a, %b + %val = select nnan <2 x i1> %cmp, <2 x half> %a, <2 x half> %b + ret <2 x half> %val +} + +define <2 x half> @v_test_fmax_legacy_uge_v2f16_nsz_flag(<2 x half> %a, <2 x half> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v2f16_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v2f16_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v3, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v2f16_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <2 x half> %a, %b + %val = select nsz <2 x i1> %cmp, <2 x half> %a, <2 x half> %b + ret <2 x half> %val +} + +define <2 x half> @v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag(<2 x half> %a, <2 x half> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <2 x half> %a, %b + %val = select nnan nsz <2 x i1> %cmp, <2 x half> %a, <2 x half> %b + ret <2 x half> %val +} + +define <4 x half> @v_test_fmin_legacy_ule_v4f16_safe(<4 x half> %a, <4 x half> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v4f16_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v4, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v5, v1 +; GFX7-NEXT: v_min_legacy_f32_e32 v2, v6, v2 +; GFX7-NEXT: v_min_legacy_f32_e32 v3, v7, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v4f16_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v7, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v4, v0, s4 +; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v4f16_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX12-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX12-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v5, v4 +; GFX12-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v7, v6 +; GFX12-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 +; GFX12-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <4 x half> %a, %b + %val = select <4 x i1> %cmp, <4 x half> %a, <4 x half> %b + ret <4 x half> %val +} + +define <4 x half> @v_test_fmin_legacy_ule_v4f16_nnan_flag(<4 x half> %a, <4 x half> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v4, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v5, v1 +; GFX7-NEXT: v_min_legacy_f32_e32 v2, v6, v2 +; GFX7-NEXT: v_min_legacy_f32_e32 v3, v7, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v7, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v4, v0, s4 +; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX12-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX12-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v5, v4 +; GFX12-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v7, v6 +; GFX12-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 +; GFX12-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <4 x half> %a, %b + %val = select nnan <4 x i1> %cmp, <4 x half> %a, <4 x half> %b + ret <4 x half> %val +} + +define <4 x half> @v_test_fmin_legacy_ule_v4f16_nsz_flag(<4 x half> %a, <4 x half> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v4f16_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v4, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v5, v1 +; GFX7-NEXT: v_min_legacy_f32_e32 v2, v6, v2 +; GFX7-NEXT: v_min_legacy_f32_e32 v3, v7, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v4f16_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v7, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v4, v0, s4 +; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v4f16_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX12-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX12-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v5, v4 +; GFX12-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v7, v6 +; GFX12-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 +; GFX12-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <4 x half> %a, %b + %val = select nsz <4 x i1> %cmp, <4 x half> %a, <4 x half> %b + ret <4 x half> %val +} + +define <4 x half> @v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag(<4 x half> %a, <4 x half> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v4, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v5, v1 +; GFX7-NEXT: v_min_legacy_f32_e32 v2, v6, v2 +; GFX7-NEXT: v_min_legacy_f32_e32 v3, v7, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX9-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v2 +; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v3 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <4 x half> %a, %b + %val = select nnan nsz <4 x i1> %cmp, <4 x half> %a, <4 x half> %b + ret <4 x half> %val +} + +define <4 x half> @v_test_fmax_legacy_uge_v4f16_safe(<4 x half> %a, <4 x half> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v4f16_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v4, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v5, v1 +; GFX7-NEXT: v_max_legacy_f32_e32 v2, v6, v2 +; GFX7-NEXT: v_max_legacy_f32_e32 v3, v7, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v4f16_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v7, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v5, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v4, v0, s4 +; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v4f16_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX12-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX12-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v5, v4 +; GFX12-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v7, v6 +; GFX12-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 +; GFX12-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <4 x half> %a, %b + %val = select <4 x i1> %cmp, <4 x half> %a, <4 x half> %b + ret <4 x half> %val +} + +define <4 x half> @v_test_fmax_legacy_uge_v4f16_nnan_flag(<4 x half> %a, <4 x half> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v4, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v5, v1 +; GFX7-NEXT: v_max_legacy_f32_e32 v2, v6, v2 +; GFX7-NEXT: v_max_legacy_f32_e32 v3, v7, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v7, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v5, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v4, v0, s4 +; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX12-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX12-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v5, v4 +; GFX12-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v7, v6 +; GFX12-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 +; GFX12-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <4 x half> %a, %b + %val = select nnan <4 x i1> %cmp, <4 x half> %a, <4 x half> %b + ret <4 x half> %val +} + +define <4 x half> @v_test_fmax_legacy_uge_v4f16_nsz_flag(<4 x half> %a, <4 x half> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v4f16_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v4, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v5, v1 +; GFX7-NEXT: v_max_legacy_f32_e32 v2, v6, v2 +; GFX7-NEXT: v_max_legacy_f32_e32 v3, v7, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v4f16_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v7, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v5, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v4, v0, s4 +; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v4f16_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX12-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX12-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v5, v4 +; GFX12-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v7, v6 +; GFX12-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 +; GFX12-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <4 x half> %a, %b + %val = select nsz <4 x i1> %cmp, <4 x half> %a, <4 x half> %b + ret <4 x half> %val +} + +define <4 x half> @v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag(<4 x half> %a, <4 x half> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v4, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v5, v1 +; GFX7-NEXT: v_max_legacy_f32_e32 v2, v6, v2 +; GFX7-NEXT: v_max_legacy_f32_e32 v3, v7, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX9-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v2 +; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v3 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <4 x half> %a, %b + %val = select nnan nsz <4 x i1> %cmp, <4 x half> %a, <4 x half> %b + ret <4 x half> %val +} + +define float @v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs(float %arg0, float %arg1) { +; GFX7-LABEL: v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_add_f32_e32 v0, v0, v0 +; GFX7-NEXT: v_add_f32_e32 v1, v1, v1 +; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_f32_e32 v0, v0, v0 +; GFX9-NEXT: v_add_f32_e32 v1, v1, v1 +; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_dual_add_f32 v0, v0, v0 :: v_dual_add_f32 v1, v1, v1 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %a = fadd nnan float %arg0, %arg0 + %b = fadd nnan float %arg1, %arg1 + %cmp = fcmp ule float %a, %b + %val = select nsz i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs(float %arg0, float %arg1) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_add_f32_e32 v0, v0, v0 +; GFX7-NEXT: v_add_f32_e32 v1, v1, v1 +; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_f32_e32 v0, v0, v0 +; GFX9-NEXT: v_add_f32_e32 v1, v1, v1 +; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_dual_add_f32 v0, v0, v0 :: v_dual_add_f32 v1, v1, v1 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %a = fadd nnan float %arg0, %arg0 + %b = fadd nnan float %arg1, %arg1 + %cmp = fcmp uge float %a, %b + %val = select nsz i1 %cmp, float %a, float %b + ret float %val +} diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll index bfc249e9081d2..340f0cdd5d5d0 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll @@ -245,6 +245,7 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun ; SI-NEXT: {{ $}} ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[PHI2]], %subreg.sub0, killed [[PHI3]], %subreg.sub1 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo + ; SI-NEXT: [[COPY6:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.3: ; SI-NEXT: successors: %bb.4(0x80000000) @@ -261,8 +262,7 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun ; SI-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; SI-NEXT: [[COPY6:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 - ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY6]] + ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]] ; SI-NEXT: $vgpr0 = COPY killed [[PHI5]] ; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE1]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 ; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 @@ -282,6 +282,7 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun ; SI-NEXT: {{ $}} ; SI-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[COPY1]], %subreg.sub0, killed [[COPY]], %subreg.sub1 ; SI-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo + ; SI-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.7: ; SI-NEXT: successors: %bb.8(0x80000000) @@ -298,8 +299,7 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun ; SI-NEXT: successors: %bb.7(0x40000000), %bb.9(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; SI-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 - ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY9]] + ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY9]] ; SI-NEXT: $vgpr0 = COPY killed [[PHI7]] ; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE3]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 ; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 @@ -367,6 +367,7 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e ; SI-NEXT: {{ $}} ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[PHI1]], %subreg.sub0, killed [[PHI2]], %subreg.sub1 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo + ; SI-NEXT: [[COPY6:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.3: ; SI-NEXT: successors: %bb.4(0x80000000) @@ -382,8 +383,7 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e ; SI-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; SI-NEXT: [[COPY6:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 - ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY6]] + ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]] ; SI-NEXT: $vgpr0 = COPY [[COPY4]] ; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE1]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 ; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 @@ -403,6 +403,7 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e ; SI-NEXT: {{ $}} ; SI-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[COPY1]], %subreg.sub0, killed [[COPY]], %subreg.sub1 ; SI-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo + ; SI-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.7: ; SI-NEXT: successors: %bb.8(0x80000000) @@ -418,8 +419,7 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e ; SI-NEXT: successors: %bb.7(0x40000000), %bb.9(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; SI-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 - ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY9]] + ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY9]] ; SI-NEXT: $vgpr0 = COPY [[COPY4]] ; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE3]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 ; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 4f2e63b5f2467..c53fb2f330a79 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -439,3 +439,539 @@ define void @test_old_store_64bit(ptr %p, i64 %v) { store atomic i64 %v, ptr %p seq_cst, align 8 ret void } + +define half @load_atomic_f16__seq_cst(ptr %ptr) { +; ARM-LABEL: load_atomic_f16__seq_cst: +; ARM: @ %bb.0: +; ARM-NEXT: ldrh r0, [r0] +; ARM-NEXT: dmb ish +; ARM-NEXT: bx lr +; +; ARMOPTNONE-LABEL: load_atomic_f16__seq_cst: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT: ldrh r0, [r0] +; ARMOPTNONE-NEXT: dmb ish +; ARMOPTNONE-NEXT: bx lr +; +; THUMBTWO-LABEL: load_atomic_f16__seq_cst: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT: ldrh r0, [r0] +; THUMBTWO-NEXT: dmb ish +; THUMBTWO-NEXT: bx lr +; +; THUMBONE-LABEL: load_atomic_f16__seq_cst: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT: push {r7, lr} +; THUMBONE-NEXT: movs r1, #0 +; THUMBONE-NEXT: mov r2, r1 +; THUMBONE-NEXT: bl __sync_val_compare_and_swap_2 +; THUMBONE-NEXT: pop {r7, pc} +; +; ARMV4-LABEL: load_atomic_f16__seq_cst: +; ARMV4: @ %bb.0: +; ARMV4-NEXT: push {r11, lr} +; ARMV4-NEXT: mov r1, #5 +; ARMV4-NEXT: bl __atomic_load_2 +; ARMV4-NEXT: pop {r11, lr} +; ARMV4-NEXT: mov pc, lr +; +; ARMV6-LABEL: load_atomic_f16__seq_cst: +; ARMV6: @ %bb.0: +; ARMV6-NEXT: ldrh r0, [r0] +; ARMV6-NEXT: mov r1, #0 +; ARMV6-NEXT: mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT: bx lr +; +; THUMBM-LABEL: load_atomic_f16__seq_cst: +; THUMBM: @ %bb.0: +; THUMBM-NEXT: ldrh r0, [r0] +; THUMBM-NEXT: dmb sy +; THUMBM-NEXT: bx lr + %val = load atomic half, ptr %ptr seq_cst, align 2 + ret half %val +} + +define bfloat @load_atomic_bf16__seq_cst(ptr %ptr) { +; ARM-LABEL: load_atomic_bf16__seq_cst: +; ARM: @ %bb.0: +; ARM-NEXT: ldrh r0, [r0] +; ARM-NEXT: dmb ish +; ARM-NEXT: bx lr +; +; ARMOPTNONE-LABEL: load_atomic_bf16__seq_cst: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT: ldrh r0, [r0] +; ARMOPTNONE-NEXT: dmb ish +; ARMOPTNONE-NEXT: bx lr +; +; THUMBTWO-LABEL: load_atomic_bf16__seq_cst: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT: ldrh r0, [r0] +; THUMBTWO-NEXT: dmb ish +; THUMBTWO-NEXT: bx lr +; +; THUMBONE-LABEL: load_atomic_bf16__seq_cst: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT: push {r7, lr} +; THUMBONE-NEXT: movs r1, #0 +; THUMBONE-NEXT: mov r2, r1 +; THUMBONE-NEXT: bl __sync_val_compare_and_swap_2 +; THUMBONE-NEXT: pop {r7, pc} +; +; ARMV4-LABEL: load_atomic_bf16__seq_cst: +; ARMV4: @ %bb.0: +; ARMV4-NEXT: push {r11, lr} +; ARMV4-NEXT: mov r1, #5 +; ARMV4-NEXT: bl __atomic_load_2 +; ARMV4-NEXT: pop {r11, lr} +; ARMV4-NEXT: mov pc, lr +; +; ARMV6-LABEL: load_atomic_bf16__seq_cst: +; ARMV6: @ %bb.0: +; ARMV6-NEXT: ldrh r0, [r0] +; ARMV6-NEXT: mov r1, #0 +; ARMV6-NEXT: mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT: bx lr +; +; THUMBM-LABEL: load_atomic_bf16__seq_cst: +; THUMBM: @ %bb.0: +; THUMBM-NEXT: ldrh r0, [r0] +; THUMBM-NEXT: dmb sy +; THUMBM-NEXT: bx lr + %val = load atomic bfloat, ptr %ptr seq_cst, align 2 + ret bfloat %val +} + +define float @load_atomic_f32__seq_cst(ptr %ptr) { +; ARM-LABEL: load_atomic_f32__seq_cst: +; ARM: @ %bb.0: +; ARM-NEXT: ldr r0, [r0] +; ARM-NEXT: dmb ish +; ARM-NEXT: bx lr +; +; ARMOPTNONE-LABEL: load_atomic_f32__seq_cst: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT: ldr r0, [r0] +; ARMOPTNONE-NEXT: dmb ish +; ARMOPTNONE-NEXT: vmov s0, r0 +; ARMOPTNONE-NEXT: bx lr +; +; THUMBTWO-LABEL: load_atomic_f32__seq_cst: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT: ldr r0, [r0] +; THUMBTWO-NEXT: dmb ish +; THUMBTWO-NEXT: bx lr +; +; THUMBONE-LABEL: load_atomic_f32__seq_cst: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT: push {r7, lr} +; THUMBONE-NEXT: movs r1, #0 +; THUMBONE-NEXT: mov r2, r1 +; THUMBONE-NEXT: bl __sync_val_compare_and_swap_4 +; THUMBONE-NEXT: pop {r7, pc} +; +; ARMV4-LABEL: load_atomic_f32__seq_cst: +; ARMV4: @ %bb.0: +; ARMV4-NEXT: push {r11, lr} +; ARMV4-NEXT: mov r1, #5 +; ARMV4-NEXT: bl __atomic_load_4 +; ARMV4-NEXT: pop {r11, lr} +; ARMV4-NEXT: mov pc, lr +; +; ARMV6-LABEL: load_atomic_f32__seq_cst: +; ARMV6: @ %bb.0: +; ARMV6-NEXT: ldr r0, [r0] +; ARMV6-NEXT: mov r1, #0 +; ARMV6-NEXT: mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT: bx lr +; +; THUMBM-LABEL: load_atomic_f32__seq_cst: +; THUMBM: @ %bb.0: +; THUMBM-NEXT: ldr r0, [r0] +; THUMBM-NEXT: dmb sy +; THUMBM-NEXT: bx lr + %val = load atomic float, ptr %ptr seq_cst, align 4 + ret float %val +} + +define double @load_atomic_f64__seq_cst(ptr %ptr) { +; ARM-LABEL: load_atomic_f64__seq_cst: +; ARM: @ %bb.0: +; ARM-NEXT: ldrexd r0, r1, [r0] +; ARM-NEXT: clrex +; ARM-NEXT: dmb ish +; ARM-NEXT: bx lr +; +; ARMOPTNONE-LABEL: load_atomic_f64__seq_cst: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT: ldrexd r2, r3, [r0] +; ARMOPTNONE-NEXT: mov r1, r3 +; ARMOPTNONE-NEXT: mov r0, r2 +; ARMOPTNONE-NEXT: clrex +; ARMOPTNONE-NEXT: dmb ish +; ARMOPTNONE-NEXT: vmov d16, r0, r1 +; ARMOPTNONE-NEXT: bx lr +; +; THUMBTWO-LABEL: load_atomic_f64__seq_cst: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT: ldrexd r0, r1, [r0] +; THUMBTWO-NEXT: clrex +; THUMBTWO-NEXT: dmb ish +; THUMBTWO-NEXT: bx lr +; +; THUMBONE-LABEL: load_atomic_f64__seq_cst: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT: push {r7, lr} +; THUMBONE-NEXT: sub sp, #8 +; THUMBONE-NEXT: movs r2, #0 +; THUMBONE-NEXT: str r2, [sp] +; THUMBONE-NEXT: str r2, [sp, #4] +; THUMBONE-NEXT: mov r3, r2 +; THUMBONE-NEXT: bl __sync_val_compare_and_swap_8 +; THUMBONE-NEXT: add sp, #8 +; THUMBONE-NEXT: pop {r7, pc} +; +; ARMV4-LABEL: load_atomic_f64__seq_cst: +; ARMV4: @ %bb.0: +; ARMV4-NEXT: push {r11, lr} +; ARMV4-NEXT: mov r1, #5 +; ARMV4-NEXT: bl __atomic_load_8 +; ARMV4-NEXT: pop {r11, lr} +; ARMV4-NEXT: mov pc, lr +; +; ARMV6-LABEL: load_atomic_f64__seq_cst: +; ARMV6: @ %bb.0: +; ARMV6-NEXT: ldrexd r0, r1, [r0] +; ARMV6-NEXT: mov r2, #0 +; ARMV6-NEXT: mcr p15, #0, r2, c7, c10, #5 +; ARMV6-NEXT: bx lr +; +; THUMBM-LABEL: load_atomic_f64__seq_cst: +; THUMBM: @ %bb.0: +; THUMBM-NEXT: push {r7, lr} +; THUMBM-NEXT: movs r1, #5 +; THUMBM-NEXT: bl __atomic_load_8 +; THUMBM-NEXT: pop {r7, pc} + %val = load atomic double, ptr %ptr seq_cst, align 8 + ret double %val +} + +define void @store_atomic_f16__seq_cst(ptr %ptr, half %val1) { +; ARM-LABEL: store_atomic_f16__seq_cst: +; ARM: @ %bb.0: +; ARM-NEXT: dmb ish +; ARM-NEXT: strh r1, [r0] +; ARM-NEXT: dmb ish +; ARM-NEXT: bx lr +; +; ARMOPTNONE-LABEL: store_atomic_f16__seq_cst: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT: sub sp, sp, #4 +; ARMOPTNONE-NEXT: str r1, [sp] @ 4-byte Spill +; ARMOPTNONE-NEXT: mov r1, r0 +; ARMOPTNONE-NEXT: ldr r0, [sp] @ 4-byte Reload +; ARMOPTNONE-NEXT: vmov s0, r0 +; ARMOPTNONE-NEXT: vmov r0, s0 +; ARMOPTNONE-NEXT: dmb ish +; ARMOPTNONE-NEXT: strh r0, [r1] +; ARMOPTNONE-NEXT: dmb ish +; ARMOPTNONE-NEXT: add sp, sp, #4 +; ARMOPTNONE-NEXT: bx lr +; +; THUMBTWO-LABEL: store_atomic_f16__seq_cst: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT: dmb ish +; THUMBTWO-NEXT: strh r1, [r0] +; THUMBTWO-NEXT: dmb ish +; THUMBTWO-NEXT: bx lr +; +; THUMBONE-LABEL: store_atomic_f16__seq_cst: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT: push {r7, lr} +; THUMBONE-NEXT: bl __sync_lock_test_and_set_2 +; THUMBONE-NEXT: pop {r7, pc} +; +; ARMV4-LABEL: store_atomic_f16__seq_cst: +; ARMV4: @ %bb.0: +; ARMV4-NEXT: push {r11, lr} +; ARMV4-NEXT: mov r2, #5 +; ARMV4-NEXT: bl __atomic_store_2 +; ARMV4-NEXT: pop {r11, lr} +; ARMV4-NEXT: mov pc, lr +; +; ARMV6-LABEL: store_atomic_f16__seq_cst: +; ARMV6: @ %bb.0: +; ARMV6-NEXT: mov r2, #0 +; ARMV6-NEXT: mcr p15, #0, r2, c7, c10, #5 +; ARMV6-NEXT: strh r1, [r0] +; ARMV6-NEXT: mcr p15, #0, r2, c7, c10, #5 +; ARMV6-NEXT: bx lr +; +; THUMBM-LABEL: store_atomic_f16__seq_cst: +; THUMBM: @ %bb.0: +; THUMBM-NEXT: dmb sy +; THUMBM-NEXT: strh r1, [r0] +; THUMBM-NEXT: dmb sy +; THUMBM-NEXT: bx lr + store atomic half %val1, ptr %ptr seq_cst, align 2 + ret void +} + +define void @store_atomic_bf16__seq_cst(ptr %ptr, bfloat %val1) { +; ARM-LABEL: store_atomic_bf16__seq_cst: +; ARM: @ %bb.0: +; ARM-NEXT: dmb ish +; ARM-NEXT: strh r1, [r0] +; ARM-NEXT: dmb ish +; ARM-NEXT: bx lr +; +; ARMOPTNONE-LABEL: store_atomic_bf16__seq_cst: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT: sub sp, sp, #4 +; ARMOPTNONE-NEXT: str r1, [sp] @ 4-byte Spill +; ARMOPTNONE-NEXT: mov r1, r0 +; ARMOPTNONE-NEXT: ldr r0, [sp] @ 4-byte Reload +; ARMOPTNONE-NEXT: vmov s0, r0 +; ARMOPTNONE-NEXT: vmov r0, s0 +; ARMOPTNONE-NEXT: dmb ish +; ARMOPTNONE-NEXT: strh r0, [r1] +; ARMOPTNONE-NEXT: dmb ish +; ARMOPTNONE-NEXT: add sp, sp, #4 +; ARMOPTNONE-NEXT: bx lr +; +; THUMBTWO-LABEL: store_atomic_bf16__seq_cst: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT: dmb ish +; THUMBTWO-NEXT: strh r1, [r0] +; THUMBTWO-NEXT: dmb ish +; THUMBTWO-NEXT: bx lr +; +; THUMBONE-LABEL: store_atomic_bf16__seq_cst: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT: push {r7, lr} +; THUMBONE-NEXT: bl __sync_lock_test_and_set_2 +; THUMBONE-NEXT: pop {r7, pc} +; +; ARMV4-LABEL: store_atomic_bf16__seq_cst: +; ARMV4: @ %bb.0: +; ARMV4-NEXT: push {r11, lr} +; ARMV4-NEXT: mov r2, #5 +; ARMV4-NEXT: bl __atomic_store_2 +; ARMV4-NEXT: pop {r11, lr} +; ARMV4-NEXT: mov pc, lr +; +; ARMV6-LABEL: store_atomic_bf16__seq_cst: +; ARMV6: @ %bb.0: +; ARMV6-NEXT: mov r2, #0 +; ARMV6-NEXT: mcr p15, #0, r2, c7, c10, #5 +; ARMV6-NEXT: strh r1, [r0] +; ARMV6-NEXT: mcr p15, #0, r2, c7, c10, #5 +; ARMV6-NEXT: bx lr +; +; THUMBM-LABEL: store_atomic_bf16__seq_cst: +; THUMBM: @ %bb.0: +; THUMBM-NEXT: dmb sy +; THUMBM-NEXT: strh r1, [r0] +; THUMBM-NEXT: dmb sy +; THUMBM-NEXT: bx lr + store atomic bfloat %val1, ptr %ptr seq_cst, align 2 + ret void +} + +define void @store_atomic_f32__seq_cst(ptr %ptr, float %val1) { +; ARM-LABEL: store_atomic_f32__seq_cst: +; ARM: @ %bb.0: +; ARM-NEXT: dmb ish +; ARM-NEXT: str r1, [r0] +; ARM-NEXT: dmb ish +; ARM-NEXT: bx lr +; +; ARMOPTNONE-LABEL: store_atomic_f32__seq_cst: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT: sub sp, sp, #4 +; ARMOPTNONE-NEXT: str r1, [sp] @ 4-byte Spill +; ARMOPTNONE-NEXT: mov r1, r0 +; ARMOPTNONE-NEXT: ldr r0, [sp] @ 4-byte Reload +; ARMOPTNONE-NEXT: vmov s0, r0 +; ARMOPTNONE-NEXT: vmov r0, s0 +; ARMOPTNONE-NEXT: dmb ish +; ARMOPTNONE-NEXT: str r0, [r1] +; ARMOPTNONE-NEXT: dmb ish +; ARMOPTNONE-NEXT: add sp, sp, #4 +; ARMOPTNONE-NEXT: bx lr +; +; THUMBTWO-LABEL: store_atomic_f32__seq_cst: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT: dmb ish +; THUMBTWO-NEXT: str r1, [r0] +; THUMBTWO-NEXT: dmb ish +; THUMBTWO-NEXT: bx lr +; +; THUMBONE-LABEL: store_atomic_f32__seq_cst: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT: push {r7, lr} +; THUMBONE-NEXT: bl __sync_lock_test_and_set_4 +; THUMBONE-NEXT: pop {r7, pc} +; +; ARMV4-LABEL: store_atomic_f32__seq_cst: +; ARMV4: @ %bb.0: +; ARMV4-NEXT: push {r11, lr} +; ARMV4-NEXT: mov r2, #5 +; ARMV4-NEXT: bl __atomic_store_4 +; ARMV4-NEXT: pop {r11, lr} +; ARMV4-NEXT: mov pc, lr +; +; ARMV6-LABEL: store_atomic_f32__seq_cst: +; ARMV6: @ %bb.0: +; ARMV6-NEXT: mov r2, #0 +; ARMV6-NEXT: mcr p15, #0, r2, c7, c10, #5 +; ARMV6-NEXT: str r1, [r0] +; ARMV6-NEXT: mcr p15, #0, r2, c7, c10, #5 +; ARMV6-NEXT: bx lr +; +; THUMBM-LABEL: store_atomic_f32__seq_cst: +; THUMBM: @ %bb.0: +; THUMBM-NEXT: dmb sy +; THUMBM-NEXT: str r1, [r0] +; THUMBM-NEXT: dmb sy +; THUMBM-NEXT: bx lr + store atomic float %val1, ptr %ptr seq_cst, align 4 + ret void +} + +define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { +; ARM-LABEL: store_atomic_f64__seq_cst: +; ARM: @ %bb.0: +; ARM-NEXT: push {r4, r5, lr} +; ARM-NEXT: mov r3, r2 +; ARM-NEXT: dmb ish +; ARM-NEXT: mov r2, r1 +; ARM-NEXT: LBB13_1: @ %atomicrmw.start +; ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-NEXT: ldrexd r4, r5, [r0] +; ARM-NEXT: strexd r1, r2, r3, [r0] +; ARM-NEXT: cmp r1, #0 +; ARM-NEXT: bne LBB13_1 +; ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; ARM-NEXT: dmb ish +; ARM-NEXT: pop {r4, r5, pc} +; +; ARMOPTNONE-LABEL: store_atomic_f64__seq_cst: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT: push {r4, r5, r7, lr} +; ARMOPTNONE-NEXT: add r7, sp, #8 +; ARMOPTNONE-NEXT: push {r8, r10, r11} +; ARMOPTNONE-NEXT: sub sp, sp, #20 +; ARMOPTNONE-NEXT: str r0, [sp] @ 4-byte Spill +; ARMOPTNONE-NEXT: vmov d16, r1, r2 +; ARMOPTNONE-NEXT: vmov r1, r2, d16 +; ARMOPTNONE-NEXT: str r2, [sp, #4] @ 4-byte Spill +; ARMOPTNONE-NEXT: str r1, [sp, #8] @ 4-byte Spill +; ARMOPTNONE-NEXT: dmb ish +; ARMOPTNONE-NEXT: ldr r1, [r0] +; ARMOPTNONE-NEXT: ldr r0, [r0, #4] +; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill +; ARMOPTNONE-NEXT: str r0, [sp, #16] @ 4-byte Spill +; ARMOPTNONE-NEXT: b LBB13_1 +; ARMOPTNONE-NEXT: LBB13_1: @ %atomicrmw.start +; ARMOPTNONE-NEXT: @ =>This Loop Header: Depth=1 +; ARMOPTNONE-NEXT: @ Child Loop BB13_2 Depth 2 +; ARMOPTNONE-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; ARMOPTNONE-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; ARMOPTNONE-NEXT: ldr r3, [sp] @ 4-byte Reload +; ARMOPTNONE-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; ARMOPTNONE-NEXT: ldr r10, [sp, #8] @ 4-byte Reload +; ARMOPTNONE-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; ARMOPTNONE-NEXT: mov r11, r0 +; ARMOPTNONE-NEXT: mov r8, r2 +; ARMOPTNONE-NEXT: mov r9, r1 +; ARMOPTNONE-NEXT: LBB13_2: @ %atomicrmw.start +; ARMOPTNONE-NEXT: @ Parent Loop BB13_1 Depth=1 +; ARMOPTNONE-NEXT: @ => This Inner Loop Header: Depth=2 +; ARMOPTNONE-NEXT: ldrexd r4, r5, [r3] +; ARMOPTNONE-NEXT: cmp r4, r8 +; ARMOPTNONE-NEXT: cmpeq r5, r9 +; ARMOPTNONE-NEXT: bne LBB13_4 +; ARMOPTNONE-NEXT: @ %bb.3: @ %atomicrmw.start +; ARMOPTNONE-NEXT: @ in Loop: Header=BB13_2 Depth=2 +; ARMOPTNONE-NEXT: strexd r0, r10, r11, [r3] +; ARMOPTNONE-NEXT: cmp r0, #0 +; ARMOPTNONE-NEXT: bne LBB13_2 +; ARMOPTNONE-NEXT: LBB13_4: @ %atomicrmw.start +; ARMOPTNONE-NEXT: @ in Loop: Header=BB13_1 Depth=1 +; ARMOPTNONE-NEXT: mov r0, r5 +; ARMOPTNONE-NEXT: eor r3, r0, r1 +; ARMOPTNONE-NEXT: mov r1, r4 +; ARMOPTNONE-NEXT: eor r2, r1, r2 +; ARMOPTNONE-NEXT: orr r2, r2, r3 +; ARMOPTNONE-NEXT: cmp r2, #0 +; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill +; ARMOPTNONE-NEXT: str r0, [sp, #16] @ 4-byte Spill +; ARMOPTNONE-NEXT: bne LBB13_1 +; ARMOPTNONE-NEXT: b LBB13_5 +; ARMOPTNONE-NEXT: LBB13_5: @ %atomicrmw.end +; ARMOPTNONE-NEXT: dmb ish +; ARMOPTNONE-NEXT: sub sp, r7, #20 +; ARMOPTNONE-NEXT: pop {r8, r10, r11} +; ARMOPTNONE-NEXT: pop {r4, r5, r7, pc} +; +; THUMBTWO-LABEL: store_atomic_f64__seq_cst: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT: dmb ish +; THUMBTWO-NEXT: LBB13_1: @ %atomicrmw.start +; THUMBTWO-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMBTWO-NEXT: ldrexd r3, r9, [r0] +; THUMBTWO-NEXT: strexd r3, r1, r2, [r0] +; THUMBTWO-NEXT: cmp r3, #0 +; THUMBTWO-NEXT: bne LBB13_1 +; THUMBTWO-NEXT: @ %bb.2: @ %atomicrmw.end +; THUMBTWO-NEXT: dmb ish +; THUMBTWO-NEXT: bx lr +; +; THUMBONE-LABEL: store_atomic_f64__seq_cst: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT: push {r7, lr} +; THUMBONE-NEXT: bl __sync_lock_test_and_set_8 +; THUMBONE-NEXT: pop {r7, pc} +; +; ARMV4-LABEL: store_atomic_f64__seq_cst: +; ARMV4: @ %bb.0: +; ARMV4-NEXT: push {r11, lr} +; ARMV4-NEXT: sub sp, sp, #8 +; ARMV4-NEXT: mov r1, #5 +; ARMV4-NEXT: str r1, [sp] +; ARMV4-NEXT: bl __atomic_store_8 +; ARMV4-NEXT: add sp, sp, #8 +; ARMV4-NEXT: pop {r11, lr} +; ARMV4-NEXT: mov pc, lr +; +; ARMV6-LABEL: store_atomic_f64__seq_cst: +; ARMV6: @ %bb.0: +; ARMV6-NEXT: push {r4, r5, r11, lr} +; ARMV6-NEXT: @ kill: def $r3 killed $r3 killed $r2_r3 def $r2_r3 +; ARMV6-NEXT: mov r1, #0 +; ARMV6-NEXT: @ kill: def $r2 killed $r2 killed $r2_r3 def $r2_r3 +; ARMV6-NEXT: mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT: .LBB13_1: @ %atomicrmw.start +; ARMV6-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARMV6-NEXT: ldrexd r4, r5, [r0] +; ARMV6-NEXT: strexd r1, r2, r3, [r0] +; ARMV6-NEXT: cmp r1, #0 +; ARMV6-NEXT: bne .LBB13_1 +; ARMV6-NEXT: @ %bb.2: @ %atomicrmw.end +; ARMV6-NEXT: mov r0, #0 +; ARMV6-NEXT: mcr p15, #0, r0, c7, c10, #5 +; ARMV6-NEXT: pop {r4, r5, r11, pc} +; +; THUMBM-LABEL: store_atomic_f64__seq_cst: +; THUMBM: @ %bb.0: +; THUMBM-NEXT: push {r7, lr} +; THUMBM-NEXT: sub sp, #8 +; THUMBM-NEXT: movs r1, #5 +; THUMBM-NEXT: str r1, [sp] +; THUMBM-NEXT: bl __atomic_store_8 +; THUMBM-NEXT: add sp, #8 +; THUMBM-NEXT: pop {r7, pc} + store atomic double %val1, ptr %ptr seq_cst, align 8 + ret void +} diff --git a/llvm/test/CodeGen/AVR/shift.ll b/llvm/test/CodeGen/AVR/shift.ll index c0abc77c9b14a..55ea509a8a5b6 100644 --- a/llvm/test/CodeGen/AVR/shift.ll +++ b/llvm/test/CodeGen/AVR/shift.ll @@ -60,13 +60,13 @@ define i64 @shift_i64_i64(i64 %a, i64 %b) { ; CHECK-NEXT: breq .LBB3_3 ; CHECK-NEXT: ; %bb.1: ; %shift.loop.preheader ; CHECK-NEXT: mov r27, r1 -; CHECK-NEXT: mov r16, r1 -; CHECK-NEXT: mov r17, r1 +; CHECK-NEXT: mov r16, r27 +; CHECK-NEXT: mov r17, r27 ; CHECK-NEXT: .LBB3_2: ; %shift.loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: mov r31, r21 ; CHECK-NEXT: lsl r31 -; CHECK-NEXT: mov r26, r1 +; CHECK-NEXT: mov r26, r27 ; CHECK-NEXT: rol r26 ; CHECK-NEXT: lsl r22 ; CHECK-NEXT: rol r23 diff --git a/llvm/test/CodeGen/PowerPC/atomics.ll b/llvm/test/CodeGen/PowerPC/atomics.ll index 04cdbe9d7e785..ff5bec53acd25 100644 --- a/llvm/test/CodeGen/PowerPC/atomics.ll +++ b/llvm/test/CodeGen/PowerPC/atomics.ll @@ -462,3 +462,212 @@ define i64 @and_i64_release(ptr %mem, i64 %operand) { %val = atomicrmw and ptr %mem, i64 %operand release ret i64 %val } + +define half @load_atomic_f16__seq_cst(ptr %ptr) { +; PPC32-LABEL: load_atomic_f16__seq_cst: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: .cfi_offset lr, 4 +; PPC32-NEXT: sync +; PPC32-NEXT: lhz r3, 0(r3) +; PPC32-NEXT: cmpw cr7, r3, r3 +; PPC32-NEXT: bne- cr7, .+4 +; PPC32-NEXT: isync +; PPC32-NEXT: bl __gnu_h2f_ieee +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; PPC64-LABEL: load_atomic_f16__seq_cst: +; PPC64: # %bb.0: +; PPC64-NEXT: mflr r0 +; PPC64-NEXT: stdu r1, -112(r1) +; PPC64-NEXT: std r0, 128(r1) +; PPC64-NEXT: .cfi_def_cfa_offset 112 +; PPC64-NEXT: .cfi_offset lr, 16 +; PPC64-NEXT: sync +; PPC64-NEXT: lhz r3, 0(r3) +; PPC64-NEXT: cmpd cr7, r3, r3 +; PPC64-NEXT: bne- cr7, .+4 +; PPC64-NEXT: isync +; PPC64-NEXT: bl __gnu_h2f_ieee +; PPC64-NEXT: nop +; PPC64-NEXT: addi r1, r1, 112 +; PPC64-NEXT: ld r0, 16(r1) +; PPC64-NEXT: mtlr r0 +; PPC64-NEXT: blr + %val = load atomic half, ptr %ptr seq_cst, align 2 + ret half %val +} + +; FIXME: bf16_to_fp fails to select +; define bfloat @load_atomic_bf16__seq_cst(ptr %ptr) { +; %val = load atomic bfloat, ptr %ptr seq_cst, align 2 +; ret bfloat %val +; } + +define float @load_atomic_f32__seq_cst(ptr %ptr) { +; PPC32-LABEL: load_atomic_f32__seq_cst: +; PPC32: # %bb.0: +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: sync +; PPC32-NEXT: lwz r3, 0(r3) +; PPC32-NEXT: cmpw cr7, r3, r3 +; PPC32-NEXT: bne- cr7, .+4 +; PPC32-NEXT: isync +; PPC32-NEXT: stw r3, 12(r1) +; PPC32-NEXT: lfs f1, 12(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: blr +; +; PPC64-LABEL: load_atomic_f32__seq_cst: +; PPC64: # %bb.0: +; PPC64-NEXT: sync +; PPC64-NEXT: lwz r3, 0(r3) +; PPC64-NEXT: cmpd cr7, r3, r3 +; PPC64-NEXT: bne- cr7, .+4 +; PPC64-NEXT: isync +; PPC64-NEXT: stw r3, -4(r1) +; PPC64-NEXT: lfs f1, -4(r1) +; PPC64-NEXT: blr + %val = load atomic float, ptr %ptr seq_cst, align 4 + ret float %val +} + +define double @load_atomic_f64__seq_cst(ptr %ptr) { +; PPC32-LABEL: load_atomic_f64__seq_cst: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: .cfi_offset lr, 4 +; PPC32-NEXT: li r4, 5 +; PPC32-NEXT: bl __atomic_load_8 +; PPC32-NEXT: stw r4, 12(r1) +; PPC32-NEXT: stw r3, 8(r1) +; PPC32-NEXT: lfd f1, 8(r1) +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; PPC64-LABEL: load_atomic_f64__seq_cst: +; PPC64: # %bb.0: +; PPC64-NEXT: sync +; PPC64-NEXT: ld r3, 0(r3) +; PPC64-NEXT: cmpd cr7, r3, r3 +; PPC64-NEXT: bne- cr7, .+4 +; PPC64-NEXT: isync +; PPC64-NEXT: std r3, -8(r1) +; PPC64-NEXT: lfd f1, -8(r1) +; PPC64-NEXT: blr + %val = load atomic double, ptr %ptr seq_cst, align 8 + ret double %val +} + +define void @store_atomic_f16__seq_cst(ptr %ptr, half %val1) { +; PPC32-LABEL: store_atomic_f16__seq_cst: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: .cfi_offset lr, 4 +; PPC32-NEXT: .cfi_offset r30, -8 +; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill +; PPC32-NEXT: mr r30, r3 +; PPC32-NEXT: bl __gnu_f2h_ieee +; PPC32-NEXT: sync +; PPC32-NEXT: sth r3, 0(r30) +; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; PPC64-LABEL: store_atomic_f16__seq_cst: +; PPC64: # %bb.0: +; PPC64-NEXT: mflr r0 +; PPC64-NEXT: stdu r1, -128(r1) +; PPC64-NEXT: std r0, 144(r1) +; PPC64-NEXT: .cfi_def_cfa_offset 128 +; PPC64-NEXT: .cfi_offset lr, 16 +; PPC64-NEXT: .cfi_offset r30, -16 +; PPC64-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; PPC64-NEXT: mr r30, r3 +; PPC64-NEXT: bl __gnu_f2h_ieee +; PPC64-NEXT: nop +; PPC64-NEXT: sync +; PPC64-NEXT: sth r3, 0(r30) +; PPC64-NEXT: ld r30, 112(r1) # 8-byte Folded Reload +; PPC64-NEXT: addi r1, r1, 128 +; PPC64-NEXT: ld r0, 16(r1) +; PPC64-NEXT: mtlr r0 +; PPC64-NEXT: blr + store atomic half %val1, ptr %ptr seq_cst, align 2 + ret void +} + +; FIXME: bf16_to_fp fails to select +; define void @store_atomic_bf16__seq_cst(ptr %ptr, bfloat %val1) { +; store atomic bfloat %val1, ptr %ptr seq_cst, align 2 +; ret void +; } + +define void @store_atomic_f32__seq_cst(ptr %ptr, float %val1) { +; PPC32-LABEL: store_atomic_f32__seq_cst: +; PPC32: # %bb.0: +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: stfs f1, 12(r1) +; PPC32-NEXT: lwz r4, 12(r1) +; PPC32-NEXT: sync +; PPC32-NEXT: stw r4, 0(r3) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: blr +; +; PPC64-LABEL: store_atomic_f32__seq_cst: +; PPC64: # %bb.0: +; PPC64-NEXT: stfs f1, -4(r1) +; PPC64-NEXT: lwz r4, -4(r1) +; PPC64-NEXT: sync +; PPC64-NEXT: stw r4, 0(r3) +; PPC64-NEXT: blr + store atomic float %val1, ptr %ptr seq_cst, align 4 + ret void +} + +define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { +; PPC32-LABEL: store_atomic_f64__seq_cst: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: stw r0, 20(r1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: .cfi_offset lr, 4 +; PPC32-NEXT: stfd f1, 8(r1) +; PPC32-NEXT: li r7, 5 +; PPC32-NEXT: lwz r5, 8(r1) +; PPC32-NEXT: lwz r6, 12(r1) +; PPC32-NEXT: bl __atomic_store_8 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; PPC64-LABEL: store_atomic_f64__seq_cst: +; PPC64: # %bb.0: +; PPC64-NEXT: stfd f1, -8(r1) +; PPC64-NEXT: ld r4, -8(r1) +; PPC64-NEXT: sync +; PPC64-NEXT: std r4, 0(r3) +; PPC64-NEXT: blr + store atomic double %val1, ptr %ptr seq_cst, align 8 + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/peephole-counter-XToI.mir b/llvm/test/CodeGen/PowerPC/peephole-counter-XToI.mir index d8f2b08adaf2f..dc20a1577aa5b 100644 --- a/llvm/test/CodeGen/PowerPC/peephole-counter-XToI.mir +++ b/llvm/test/CodeGen/PowerPC/peephole-counter-XToI.mir @@ -3,16 +3,16 @@ # RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs \ # RUN: -run-pass ppc-mi-peepholes %s -o - | FileCheck %s --check-prefix=ALL # RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs \ -# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-xtoi-peephole-skip=0,ppc-xtoi-peephole-count=8 \ +# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-xtoi-peephole=0-7 \ # RUN: | FileCheck %s --check-prefix=ALL # RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs \ -# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-xtoi-peephole-skip=3,ppc-xtoi-peephole-count=2 \ +# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-xtoi-peephole=3-4 \ # RUN: | FileCheck %s --check-prefix=ONE-FIRSTSTORE # RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs \ -# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-xtoi-peephole-skip=5,ppc-xtoi-peephole-count=2 \ +# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-xtoi-peephole=5-6 \ # RUN: | FileCheck %s --check-prefix=ONE-SECONDSTORE # RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs \ -# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-xtoi-peephole-skip=3,ppc-xtoi-peephole-count=4 \ +# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-xtoi-peephole=3-6 \ # RUN: | FileCheck %s --check-prefix=TWO --- diff --git a/llvm/test/CodeGen/PowerPC/peephole-counter-perOp.mir b/llvm/test/CodeGen/PowerPC/peephole-counter-perOp.mir index cf3ff291e26c6..09f7ededa20c6 100644 --- a/llvm/test/CodeGen/PowerPC/peephole-counter-perOp.mir +++ b/llvm/test/CodeGen/PowerPC/peephole-counter-perOp.mir @@ -3,16 +3,19 @@ # RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs \ # RUN: -run-pass ppc-mi-peepholes %s -o - | FileCheck %s --check-prefix=ALL # RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs \ -# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-per-op-peephole-skip=0,ppc-per-op-peephole-count=6 \ +# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-per-op-peephole=0-5 \ # RUN: | FileCheck %s --check-prefix=ALL # RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs \ -# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-per-op-peephole-skip=3,ppc-per-op-peephole-count=1 \ +# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-per-op-peephole=0-5 \ +# RUN: | FileCheck %s --check-prefix=ALL +# RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs \ +# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-per-op-peephole=3 \ # RUN: | FileCheck %s --check-prefix=ONE-FIRST-RLWINM # RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs \ -# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-per-op-peephole-skip=4,ppc-per-op-peephole-count=1 \ +# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-per-op-peephole=4 \ # RUN: | FileCheck %s --check-prefix=ONE-SECOND-RLWINM # RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs \ -# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-per-op-peephole-skip=3,ppc-per-op-peephole-count=2 \ +# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-per-op-peephole=3-4 \ # RUN: | FileCheck %s --check-prefix=TWO --- diff --git a/llvm/test/CodeGen/PowerPC/pr62372.ll b/llvm/test/CodeGen/PowerPC/pr62372.ll new file mode 100644 index 0000000000000..8df236adc92d7 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr62372.ll @@ -0,0 +1,13 @@ +; RUN: llc -ppc-asm-full-reg-names -mcpu=pwr10 -mtriple powerpc64le-unknown-linux-gnu \ +; RUN: -o - %s | FileCheck %s + +@bar = dso_local global i32 0, align 4 + +define dso_local ptr @foo() #0 { +entry: + ret ptr @bar +} + +attributes #0 = { "use-soft-float"="true" } + +; CHECK: paddi r3, 0, bar@PCREL, 1 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll b/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll new file mode 100644 index 0000000000000..5c42fefb95b39 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll @@ -0,0 +1,207 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -global-isel -global-isel-abort=1 < %s 2>&1 | FileCheck %s --check-prefixes=RV32 +; RUN: llc -mtriple=riscv64 -global-isel -global-isel-abort=1 < %s 2>&1 | FileCheck %s --check-prefixes=RV64 + +define i2 @bitreverse_i2(i2 %x) { +; RV32-LABEL: bitreverse_i2: +; RV32: # %bb.0: +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: andi a1, a1, 2 +; RV32-NEXT: andi a0, a0, 3 +; RV32-NEXT: srli a0, a0, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: bitreverse_i2: +; RV64: # %bb.0: +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: andi a1, a1, 2 +; RV64-NEXT: andi a0, a0, 3 +; RV64-NEXT: srliw a0, a0, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %rev = call i2 @llvm.bitreverse.i2(i2 %x) + ret i2 %rev +} + +define i3 @bitreverse_i3(i3 %x) { +; RV32-LABEL: bitreverse_i3: +; RV32: # %bb.0: +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: andi a1, a1, 4 +; RV32-NEXT: andi a0, a0, 7 +; RV32-NEXT: andi a2, a0, 2 +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: bitreverse_i3: +; RV64: # %bb.0: +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: andi a1, a1, 4 +; RV64-NEXT: andi a0, a0, 7 +; RV64-NEXT: andi a2, a0, 2 +; RV64-NEXT: or a1, a1, a2 +; RV64-NEXT: srliw a0, a0, 2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %rev = call i3 @llvm.bitreverse.i3(i3 %x) + ret i3 %rev +} + +define i4 @bitreverse_i4(i4 %x) { +; RV32-LABEL: bitreverse_i4: +; RV32: # %bb.0: +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: andi a1, a1, 8 +; RV32-NEXT: slli a2, a0, 1 +; RV32-NEXT: andi a2, a2, 4 +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: andi a0, a0, 15 +; RV32-NEXT: srli a2, a0, 1 +; RV32-NEXT: andi a2, a2, 2 +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: srli a0, a0, 3 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: bitreverse_i4: +; RV64: # %bb.0: +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: andi a1, a1, 8 +; RV64-NEXT: slli a2, a0, 1 +; RV64-NEXT: andi a2, a2, 4 +; RV64-NEXT: or a1, a1, a2 +; RV64-NEXT: andi a0, a0, 15 +; RV64-NEXT: srliw a2, a0, 1 +; RV64-NEXT: andi a2, a2, 2 +; RV64-NEXT: or a1, a1, a2 +; RV64-NEXT: srliw a0, a0, 3 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %rev = call i4 @llvm.bitreverse.i4(i4 %x) + ret i4 %rev +} + +define i7 @bitreverse_i7(i7 %x) { +; RV32-LABEL: bitreverse_i7: +; RV32: # %bb.0: +; RV32-NEXT: slli a1, a0, 6 +; RV32-NEXT: andi a1, a1, 64 +; RV32-NEXT: slli a2, a0, 4 +; RV32-NEXT: andi a2, a2, 32 +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: slli a2, a0, 2 +; RV32-NEXT: andi a2, a2, 16 +; RV32-NEXT: andi a0, a0, 127 +; RV32-NEXT: andi a3, a0, 8 +; RV32-NEXT: or a2, a2, a3 +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: srli a2, a0, 2 +; RV32-NEXT: andi a2, a2, 4 +; RV32-NEXT: srli a3, a0, 4 +; RV32-NEXT: andi a3, a3, 2 +; RV32-NEXT: or a2, a2, a3 +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: srli a0, a0, 6 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: bitreverse_i7: +; RV64: # %bb.0: +; RV64-NEXT: slli a1, a0, 6 +; RV64-NEXT: andi a1, a1, 64 +; RV64-NEXT: slli a2, a0, 4 +; RV64-NEXT: andi a2, a2, 32 +; RV64-NEXT: or a1, a1, a2 +; RV64-NEXT: slli a2, a0, 2 +; RV64-NEXT: andi a2, a2, 16 +; RV64-NEXT: andi a0, a0, 127 +; RV64-NEXT: andi a3, a0, 8 +; RV64-NEXT: or a2, a2, a3 +; RV64-NEXT: or a1, a1, a2 +; RV64-NEXT: srliw a2, a0, 2 +; RV64-NEXT: andi a2, a2, 4 +; RV64-NEXT: srliw a3, a0, 4 +; RV64-NEXT: andi a3, a3, 2 +; RV64-NEXT: or a2, a2, a3 +; RV64-NEXT: or a1, a1, a2 +; RV64-NEXT: srliw a0, a0, 6 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %rev = call i7 @llvm.bitreverse.i7(i7 %x) + ret i7 %rev +} + +define i24 @bitreverse_i24(i24 %x) { +; RV32-LABEL: bitreverse_i24: +; RV32: # %bb.0: +; RV32-NEXT: slli a1, a0, 16 +; RV32-NEXT: lui a2, 4096 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: srli a0, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: lui a1, 1048335 +; RV32-NEXT: addi a1, a1, 240 +; RV32-NEXT: and a3, a1, a2 +; RV32-NEXT: and a3, a0, a3 +; RV32-NEXT: srli a3, a3, 4 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: or a0, a3, a0 +; RV32-NEXT: lui a1, 1047757 +; RV32-NEXT: addi a1, a1, -820 +; RV32-NEXT: and a3, a1, a2 +; RV32-NEXT: and a3, a0, a3 +; RV32-NEXT: srli a3, a3, 2 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: or a0, a3, a0 +; RV32-NEXT: lui a1, 1047211 +; RV32-NEXT: addi a1, a1, -1366 +; RV32-NEXT: and a2, a1, a2 +; RV32-NEXT: and a2, a0, a2 +; RV32-NEXT: srli a2, a2, 1 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: bitreverse_i24: +; RV64: # %bb.0: +; RV64-NEXT: slli a1, a0, 16 +; RV64-NEXT: lui a2, 4096 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: srliw a0, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: lui a1, 1048335 +; RV64-NEXT: addi a1, a1, 240 +; RV64-NEXT: and a3, a1, a2 +; RV64-NEXT: and a3, a0, a3 +; RV64-NEXT: srliw a3, a3, 4 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: or a0, a3, a0 +; RV64-NEXT: lui a1, 1047757 +; RV64-NEXT: addi a1, a1, -820 +; RV64-NEXT: and a3, a1, a2 +; RV64-NEXT: and a3, a0, a3 +; RV64-NEXT: srliw a3, a3, 2 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: or a0, a3, a0 +; RV64-NEXT: lui a1, 1047211 +; RV64-NEXT: addiw a1, a1, -1366 +; RV64-NEXT: and a2, a1, a2 +; RV64-NEXT: and a2, a0, a2 +; RV64-NEXT: srliw a2, a2, 1 +; RV64-NEXT: slliw a0, a0, 1 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: or a0, a2, a0 +; RV64-NEXT: ret + %rev = call i24 @llvm.bitreverse.i24(i24 %x) + ret i24 %rev +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-abs-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-abs-rv32.mir index 7d05edd3f3413..f96d659782178 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-abs-rv32.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-abs-rv32.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=riscv32 -run-pass=legalizer %s -o - \ -# RUN: | FileCheck %s --check-prefix=RV32I +# RUN: | FileCheck %s --check-prefixes=CHECK,RV32I # RUN: llc -mtriple=riscv32 -mattr=+zbb -run-pass=legalizer %s -o -\ -# RUN: | FileCheck %s --check-prefix=RV32ZBB +# RUN: | FileCheck %s --check-prefixes=CHECK,RV32ZBB --- name: abs_i8 @@ -124,10 +124,12 @@ body: | ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C1]](s32) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[ASHR]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]] ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ICMP]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD2]], [[ASHR1]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ADD2]](s32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[ASHR]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY3]], [[ASHR1]] ; CHECK-NEXT: $x10 = COPY [[XOR]](s32) ; CHECK-NEXT: $x11 = COPY [[XOR1]](s32) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-bitreverse-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-bitreverse-rv32.mir index 5044514babe54..7625a5c2d568a 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-bitreverse-rv32.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-bitreverse-rv32.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 -# RUN: llc -mtriple=riscv32 -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=riscv32 -mattr=+v -global-isel-abort=0 -run-pass=legalizer %s -o - | FileCheck %s --- name: bitreverse_i8 @@ -248,3 +248,277 @@ body: | PseudoRET implicit $x10, implicit $x11 ... +--- +name: bitreverse_i2 +body: | + bb.1.entry: + liveins: $x10 + + ; CHECK-LABEL: name: bitreverse_i2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[C2]](s32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND2]] + ; CHECK-NEXT: $x10 = COPY [[OR]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10 + %1:_(s32) = COPY $x10 + %0:_(s2) = G_TRUNC %1(s32) + %2:_(s2) = G_BITREVERSE %0 + %3:_(s32) = G_ANYEXT %2(s2) + $x10 = COPY %3(s32) + PseudoRET implicit $x10 + +... +--- +name: bitreverse_i3 +body: | + bb.1.entry: + liveins: $x10 + + ; CHECK-LABEL: name: bitreverse_i3 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[C2]](s32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND2]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C6]] + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C5]](s32) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C7]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[AND4]] + ; CHECK-NEXT: $x10 = COPY [[OR1]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10 + %1:_(s32) = COPY $x10 + %0:_(s3) = G_TRUNC %1(s32) + %2:_(s3) = G_BITREVERSE %0 + %3:_(s32) = G_ANYEXT %2(s3) + $x10 = COPY %3(s32) + PseudoRET implicit $x10 + +... +--- +name: bitreverse_i4 +body: | + bb.1.entry: + liveins: $x10 + + ; CHECK-LABEL: name: bitreverse_i4 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C2]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SHL1]], [[C3]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND1]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C5]] + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C4]](s32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C6]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[AND3]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C8]] + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[C7]](s32) + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C9]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[AND5]] + ; CHECK-NEXT: $x10 = COPY [[OR2]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10 + %1:_(s32) = COPY $x10 + %0:_(s4) = G_TRUNC %1(s32) + %2:_(s4) = G_BITREVERSE %0 + %3:_(s32) = G_ANYEXT %2(s4) + $x10 = COPY %3(s32) + PseudoRET implicit $x10 + +... +--- +name: bitreverse_i7 +body: | + bb.1.entry: + liveins: $x10 + + ; CHECK-LABEL: name: bitreverse_i7 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C2]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SHL1]], [[C3]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND1]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C4]](s32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[AND2]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C7]] + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C6]](s32) + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C8]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[AND4]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C10]] + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[C9]](s32) + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C11]] + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[AND6]] + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C13]] + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[C12]](s32) + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C14]] + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[AND8]] + ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C16]] + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[C15]](s32) + ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C17]] + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[AND10]] + ; CHECK-NEXT: $x10 = COPY [[OR5]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10 + %1:_(s32) = COPY $x10 + %0:_(s7) = G_TRUNC %1(s32) + %2:_(s7) = G_BITREVERSE %0 + %3:_(s32) = G_ANYEXT %2(s7) + $x10 = COPY %3(s32) + PseudoRET implicit $x10 + +... +--- +name: bitreverse_i24 +body: | + bb.1.entry: + liveins: $x10 + + ; CHECK-LABEL: name: bitreverse_i24 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -986896 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C3]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C4]] + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[OR]], [[C2]](s32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SHL1]], [[C3]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[AND3]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 -3355444 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C6]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[AND4]], [[C7]] + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[C5]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C5]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C6]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[AND6]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 -5592406 + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C9]] + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[AND7]], [[C10]] + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[C8]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[OR2]], [[C8]](s32) + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C9]] + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[AND9]] + ; CHECK-NEXT: $x10 = COPY [[OR3]](s32) + ; CHECK-NEXT: PseudoRET implicit $x10 + %1:_(s32) = COPY $x10 + %0:_(s24) = G_TRUNC %1(s32) + %2:_(s24) = G_BITREVERSE %0 + %3:_(s32) = G_ANYEXT %2(s24) + $x10 = COPY %3(s32) + PseudoRET implicit $x10 + +... +--- +name: bitreverse_v2i4 +body: | + bb.1.entry: + + ; CHECK-LABEL: name: bitreverse_v2i4 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $v8 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s4>) = G_TRUNC [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s4) = G_CONSTANT i4 3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s4>) = G_BUILD_VECTOR [[C]](s4), [[C]](s4) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<2 x s4>) = G_SHL [[TRUNC]], [[BUILD_VECTOR]](<2 x s4>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s4) = G_CONSTANT i4 -8 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s4>) = G_BUILD_VECTOR [[C1]](s4), [[C1]](s4) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s4>) = G_AND [[SHL]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s4) = G_CONSTANT i4 1 + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s4>) = G_BUILD_VECTOR [[C2]](s4), [[C2]](s4) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s4>) = G_SHL [[TRUNC]], [[BUILD_VECTOR2]](<2 x s4>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s4) = G_CONSTANT i4 4 + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s4>) = G_BUILD_VECTOR [[C3]](s4), [[C3]](s4) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s4>) = G_AND [[SHL1]], [[BUILD_VECTOR3]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s4>) = G_OR [[AND]], [[AND1]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s4) = G_CONSTANT i4 1 + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s4>) = G_BUILD_VECTOR [[C4]](s4), [[C4]](s4) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s4>) = G_LSHR [[TRUNC]], [[BUILD_VECTOR4]](<2 x s4>) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s4) = G_CONSTANT i4 2 + ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s4>) = G_BUILD_VECTOR [[C5]](s4), [[C5]](s4) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<2 x s4>) = G_AND [[LSHR]], [[BUILD_VECTOR5]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s4>) = G_OR [[OR]], [[AND2]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s4) = G_CONSTANT i4 3 + ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s4>) = G_BUILD_VECTOR [[C6]](s4), [[C6]](s4) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s4>) = G_LSHR [[TRUNC]], [[BUILD_VECTOR6]](<2 x s4>) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s4) = G_CONSTANT i4 1 + ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s4>) = G_BUILD_VECTOR [[C7]](s4), [[C7]](s4) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<2 x s4>) = G_AND [[LSHR1]], [[BUILD_VECTOR7]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(<2 x s4>) = G_OR [[OR1]], [[AND3]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s4>) = COPY [[OR2]](<2 x s4>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<2 x s32>) = G_ANYEXT [[COPY1]](<2 x s4>) + ; CHECK-NEXT: $v8 = COPY [[ANYEXT]](<2 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<2 x s32>) = COPY $v8 + %0:_(<2 x s4>) = G_TRUNC %1(<2 x s32>) + %2:_(<2 x s4>) = G_BITREVERSE %0 + %3:_(<2 x s32>) = G_ANYEXT %2(<2 x s4>) + $v8 = COPY %3(<2 x s32>) + PseudoRET implicit $v8 + +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-bitreverse-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-bitreverse-rv64.mir index d147350465166..71583f15cd5cd 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-bitreverse-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-bitreverse-rv64.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 -# RUN: llc -mtriple=riscv64 -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=riscv64 -mattr=+v -global-isel-abort=0 -global-isel-abort=0 -run-pass=legalizer %s -o - | FileCheck %s --- name: bitreverse_i8 @@ -251,3 +251,304 @@ body: | PseudoRET implicit $x10 ... +--- +name: bitreverse_i2 +body: | + bb.1.entry: + liveins: $x10 + + ; CHECK-LABEL: name: bitreverse_i2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[C3]](s64) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND2]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) + ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10 + %1:_(s64) = COPY $x10 + %0:_(s2) = G_TRUNC %1(s64) + %2:_(s2) = G_BITREVERSE %0 + %3:_(s64) = G_ANYEXT %2(s2) + $x10 = COPY %3(s64) + PseudoRET implicit $x10 + +... +--- +name: bitreverse_i3 +body: | + bb.1.entry: + liveins: $x10 + + ; CHECK-LABEL: name: bitreverse_i3 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[C3]](s64) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND2]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C5]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C6]](s64) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C7]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[AND4]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) + ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10 + %1:_(s64) = COPY $x10 + %0:_(s3) = G_TRUNC %1(s64) + %2:_(s3) = G_BITREVERSE %0 + %3:_(s64) = G_ANYEXT %2(s3) + $x10 = COPY %3(s64) + PseudoRET implicit $x10 + +... +--- +name: bitreverse_i4 +body: | + bb.1.entry: + liveins: $x10 + + ; CHECK-LABEL: name: bitreverse_i4 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[TRUNC1]], [[C2]](s64) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SHL1]], [[C3]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C4]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C5]](s64) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C6]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[AND3]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]], [[C7]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[C8]](s64) + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C9]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[AND5]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR2]](s32) + ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10 + %1:_(s64) = COPY $x10 + %0:_(s4) = G_TRUNC %1(s64) + %2:_(s4) = G_BITREVERSE %0 + %3:_(s64) = G_ANYEXT %2(s4) + $x10 = COPY %3(s64) + PseudoRET implicit $x10 + +... +--- +name: bitreverse_i7 +body: | + bb.1.entry: + liveins: $x10 + + ; CHECK-LABEL: name: bitreverse_i7 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[TRUNC1]], [[C2]](s64) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SHL1]], [[C3]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[TRUNC2]], [[C4]](s64) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[AND2]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]], [[C6]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C7]](s64) + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C8]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[AND4]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[TRUNC4]], [[C9]] + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[C10]](s64) + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C11]] + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[AND6]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[TRUNC5]], [[C12]] + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[C13]](s64) + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C14]] + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[AND8]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[TRUNC6]], [[C15]] + ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[C16]](s64) + ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C17]] + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[AND10]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10 + %1:_(s64) = COPY $x10 + %0:_(s7) = G_TRUNC %1(s64) + %2:_(s7) = G_BITREVERSE %0 + %3:_(s64) = G_ANYEXT %2(s7) + $x10 = COPY %3(s64) + PseudoRET implicit $x10 + +... +--- +name: bitreverse_i24 +body: | + bb.1.entry: + liveins: $x10 + + ; CHECK-LABEL: name: bitreverse_i24 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -986896 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C3]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C4]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C5]](s64) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[OR]], [[C6]](s64) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SHL1]], [[C3]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[AND3]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 -3355444 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C7]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[AND4]], [[C8]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[C9]](s64) + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C10]](s64) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C7]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[AND6]] + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 -5592406 + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C11]] + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[AND7]], [[C12]] + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[C13]](s64) + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[OR2]], [[C14]](s64) + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C11]] + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[AND9]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR3]](s32) + ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; CHECK-NEXT: PseudoRET implicit $x10 + %1:_(s64) = COPY $x10 + %0:_(s24) = G_TRUNC %1(s64) + %2:_(s24) = G_BITREVERSE %0 + %3:_(s64) = G_ANYEXT %2(s24) + $x10 = COPY %3(s64) + PseudoRET implicit $x10 + +... +--- +name: bitreverse_v2i4 +body: | + bb.1.entry: + + ; CHECK-LABEL: name: bitreverse_v2i4 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $v8 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s4>) = G_TRUNC [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s4) = G_CONSTANT i4 3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s4>) = G_BUILD_VECTOR [[C]](s4), [[C]](s4) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<2 x s4>) = G_SHL [[TRUNC]], [[BUILD_VECTOR]](<2 x s4>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s4) = G_CONSTANT i4 -8 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s4>) = G_BUILD_VECTOR [[C1]](s4), [[C1]](s4) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s4>) = G_AND [[SHL]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s4) = G_CONSTANT i4 1 + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s4>) = G_BUILD_VECTOR [[C2]](s4), [[C2]](s4) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s4>) = G_SHL [[TRUNC]], [[BUILD_VECTOR2]](<2 x s4>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s4) = G_CONSTANT i4 4 + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s4>) = G_BUILD_VECTOR [[C3]](s4), [[C3]](s4) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s4>) = G_AND [[SHL1]], [[BUILD_VECTOR3]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s4>) = G_OR [[AND]], [[AND1]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s4) = G_CONSTANT i4 1 + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s4>) = G_BUILD_VECTOR [[C4]](s4), [[C4]](s4) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s4>) = G_LSHR [[TRUNC]], [[BUILD_VECTOR4]](<2 x s4>) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s4) = G_CONSTANT i4 2 + ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s4>) = G_BUILD_VECTOR [[C5]](s4), [[C5]](s4) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<2 x s4>) = G_AND [[LSHR]], [[BUILD_VECTOR5]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s4>) = G_OR [[OR]], [[AND2]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s4) = G_CONSTANT i4 3 + ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s4>) = G_BUILD_VECTOR [[C6]](s4), [[C6]](s4) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s4>) = G_LSHR [[TRUNC]], [[BUILD_VECTOR6]](<2 x s4>) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s4) = G_CONSTANT i4 1 + ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s4>) = G_BUILD_VECTOR [[C7]](s4), [[C7]](s4) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<2 x s4>) = G_AND [[LSHR1]], [[BUILD_VECTOR7]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(<2 x s4>) = G_OR [[OR1]], [[AND3]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s4>) = COPY [[OR2]](<2 x s4>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<2 x s32>) = G_ANYEXT [[COPY1]](<2 x s4>) + ; CHECK-NEXT: $v8 = COPY [[ANYEXT]](<2 x s32>) + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(<2 x s32>) = COPY $v8 + %0:_(<2 x s4>) = G_TRUNC %1(<2 x s32>) + %2:_(<2 x s4>) = G_BITREVERSE %0 + %3:_(<2 x s32>) = G_ANYEXT %2(<2 x s4>) + $v8 = COPY %3(<2 x s32>) + PseudoRET implicit $v8 + +... diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll index 549d531e829ea..a90c244437a03 100644 --- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll @@ -383,8 +383,8 @@ define i64 @test_cttz_i64(i64 %a) nounwind { ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lui a0, %hi(.LCPI3_0) -; RV32I-NEXT: addi s4, a0, %lo(.LCPI3_0) +; RV32I-NEXT: lui s4, %hi(.LCPI3_0) +; RV32I-NEXT: addi s4, s4, %lo(.LCPI3_0) ; RV32I-NEXT: neg a0, s2 ; RV32I-NEXT: and a0, s2, a0 ; RV32I-NEXT: mv a1, s3 @@ -442,9 +442,9 @@ define i64 @test_cttz_i64(i64 %a) nounwind { ; RV32M-LABEL: test_cttz_i64: ; RV32M: # %bb.0: ; RV32M-NEXT: lui a2, 30667 -; RV32M-NEXT: addi a2, a2, 1329 -; RV32M-NEXT: lui a3, %hi(.LCPI3_0) -; RV32M-NEXT: addi a3, a3, %lo(.LCPI3_0) +; RV32M-NEXT: addi a3, a2, 1329 +; RV32M-NEXT: lui a2, %hi(.LCPI3_0) +; RV32M-NEXT: addi a2, a2, %lo(.LCPI3_0) ; RV32M-NEXT: bnez a1, .LBB3_3 ; RV32M-NEXT: # %bb.1: ; RV32M-NEXT: li a1, 32 @@ -452,18 +452,18 @@ define i64 @test_cttz_i64(i64 %a) nounwind { ; RV32M-NEXT: .LBB3_2: ; RV32M-NEXT: neg a1, a0 ; RV32M-NEXT: and a0, a0, a1 -; RV32M-NEXT: mul a0, a0, a2 +; RV32M-NEXT: mul a0, a0, a3 ; RV32M-NEXT: srli a0, a0, 27 -; RV32M-NEXT: add a0, a3, a0 +; RV32M-NEXT: add a0, a2, a0 ; RV32M-NEXT: lbu a0, 0(a0) ; RV32M-NEXT: li a1, 0 ; RV32M-NEXT: ret ; RV32M-NEXT: .LBB3_3: ; RV32M-NEXT: neg a4, a1 ; RV32M-NEXT: and a1, a1, a4 -; RV32M-NEXT: mul a1, a1, a2 +; RV32M-NEXT: mul a1, a1, a3 ; RV32M-NEXT: srli a1, a1, 27 -; RV32M-NEXT: add a1, a3, a1 +; RV32M-NEXT: add a1, a2, a1 ; RV32M-NEXT: lbu a1, 0(a1) ; RV32M-NEXT: bnez a0, .LBB3_2 ; RV32M-NEXT: .LBB3_4: @@ -814,8 +814,8 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind { ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3 ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lui a0, %hi(.LCPI7_0) -; RV32I-NEXT: addi s4, a0, %lo(.LCPI7_0) +; RV32I-NEXT: lui s4, %hi(.LCPI7_0) +; RV32I-NEXT: addi s4, s4, %lo(.LCPI7_0) ; RV32I-NEXT: neg a0, s1 ; RV32I-NEXT: and a0, s1, a0 ; RV32I-NEXT: mv a1, s3 diff --git a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll index 9ae30e646fdbf..fe6e20d852d59 100644 --- a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll +++ b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll @@ -48,8 +48,8 @@ define signext i32 @ctz_dereferencing_pointer(ptr %b) nounwind { ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __mulsi3 ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lui a0, %hi(.LCPI0_0) -; RV32I-NEXT: addi s3, a0, %lo(.LCPI0_0) +; RV32I-NEXT: lui s3, %hi(.LCPI0_0) +; RV32I-NEXT: addi s3, s3, %lo(.LCPI0_0) ; RV32I-NEXT: neg a0, s4 ; RV32I-NEXT: and a0, s4, a0 ; RV32I-NEXT: mv a1, s1 @@ -511,8 +511,8 @@ define signext i32 @ctz4(i64 %b) nounwind { ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lui a0, %hi(.LCPI6_0) -; RV32I-NEXT: addi s4, a0, %lo(.LCPI6_0) +; RV32I-NEXT: lui s4, %hi(.LCPI6_0) +; RV32I-NEXT: addi s4, s4, %lo(.LCPI6_0) ; RV32I-NEXT: neg a0, s2 ; RV32I-NEXT: and a0, s2, a0 ; RV32I-NEXT: mv a1, s3 diff --git a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll index eb6ac985287a1..478d2eae9dca2 100644 --- a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll +++ b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll @@ -24,31 +24,31 @@ define void @_Z3foov() { ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_49) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_49) ; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma -; CHECK-NEXT: vle16.v v10, (a0) +; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_48) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_48) -; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v10, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_46) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_46) -; CHECK-NEXT: vle16.v v12, (a0) +; CHECK-NEXT: vle16.v v10, (a0) ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_45) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_45) -; CHECK-NEXT: vle16.v v14, (a0) +; CHECK-NEXT: vle16.v v12, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: vs2r.v v12, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: vs2r.v v14, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: vs2r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_40) diff --git a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll index 3c2e84689c979..62b1549a5d58a 100644 --- a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll +++ b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll @@ -389,8 +389,8 @@ define dso_local i32 @load_ga() local_unnamed_addr #0 { define dso_local i64 @load_ga_8() nounwind { ; RV32I-LABEL: load_ga_8: ; RV32I: # %bb.0: # %entry -; RV32I-NEXT: lui a0, %hi(ga_8) -; RV32I-NEXT: addi a1, a0, %lo(ga_8) +; RV32I-NEXT: lui a1, %hi(ga_8) +; RV32I-NEXT: addi a1, a1, %lo(ga_8) ; RV32I-NEXT: lw a0, 8(a1) ; RV32I-NEXT: lw a1, 12(a1) ; RV32I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll b/llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll new file mode 100644 index 0000000000000..e30bdfb939471 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O3 < %s -mtriple=riscv64 -mattr=+v | FileCheck %s + +declare i32 @llvm.vector.reduce.add.nxv2i32() + +define i32 @test(ptr %a, i64 %n) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: .LBB0_1: # %loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vl1re32.v v9, (a0) +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: vredsum.vs v9, v9, v8 +; CHECK-NEXT: vmv.x.s a3, v9 +; CHECK-NEXT: addw a3, a3, a3 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: addi a0, a0, 8 +; CHECK-NEXT: bnez a1, .LBB0_1 +; CHECK-NEXT: # %bb.2: # %exit +; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: ret +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.inc, %loop ] + %sum = phi i32 [ 0, %entry ], [ %sum.inc, %loop ] + %idx = getelementptr inbounds ptr, ptr %a, i64 %indvar + %data = load , ptr %idx + %reduce = tail call i32 @llvm.vector.reduce.add.nxv2i32( %data) + %sum.inc = add i32 %reduce, %reduce + %indvar.inc = add i64 %indvar, 1 + %cmp = icmp eq i64 %indvar.inc, %n + br i1 %cmp, label %exit, label %loop + +exit: + ret i32 %sum +} diff --git a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll index b45ab135fa1c7..197366e7e05fe 100644 --- a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll @@ -209,8 +209,8 @@ define i64 @cttz_i64(i64 %a) nounwind { ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lui a0, %hi(.LCPI3_0) -; RV32I-NEXT: addi s4, a0, %lo(.LCPI3_0) +; RV32I-NEXT: lui s4, %hi(.LCPI3_0) +; RV32I-NEXT: addi s4, s4, %lo(.LCPI3_0) ; RV32I-NEXT: neg a0, s2 ; RV32I-NEXT: and a0, s2, a0 ; RV32I-NEXT: mv a1, s3 diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll index 7e6c3f9c87d27..f25aa0de89da8 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll @@ -199,8 +199,8 @@ define i64 @cttz_i64(i64 %a) nounwind { ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lui a0, %hi(.LCPI3_0) -; RV32I-NEXT: addi s4, a0, %lo(.LCPI3_0) +; RV32I-NEXT: lui s4, %hi(.LCPI3_0) +; RV32I-NEXT: addi s4, s4, %lo(.LCPI3_0) ; RV32I-NEXT: neg a0, s2 ; RV32I-NEXT: and a0, s2, a0 ; RV32I-NEXT: mv a1, s3 diff --git a/llvm/test/CodeGen/RISCV/rvv/65704-illegal-instruction.ll b/llvm/test/CodeGen/RISCV/rvv/65704-illegal-instruction.ll index 42d6dac5b07fa..5ced89c17c420 100644 --- a/llvm/test/CodeGen/RISCV/rvv/65704-illegal-instruction.ll +++ b/llvm/test/CodeGen/RISCV/rvv/65704-illegal-instruction.ll @@ -15,27 +15,30 @@ define void @foo( %0) { ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 +; CHECK-NEXT: .cfi_offset s2, -32 +; CHECK-NEXT: li s0, 0 ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 0, e8, m1, tu, ma ; CHECK-NEXT: vslideup.vi v9, v10, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv.x.s s0, v9 +; CHECK-NEXT: vmv.x.s s1, v9 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v9, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv.x.s s1, v8 +; CHECK-NEXT: vmv.x.s s2, v8 ; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: mv a0, s0 -; CHECK-NEXT: mv a2, s1 -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: jalr a1 +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: mv a1, s0 +; CHECK-NEXT: mv a2, s2 +; CHECK-NEXT: mv a3, s0 +; CHECK-NEXT: mv a4, s0 +; CHECK-NEXT: mv a5, s0 +; CHECK-NEXT: jalr s0 ; CHECK-NEXT: j .LBB0_1 %2 = tail call @llvm.vector.insert.nxv8i8.v16i8( undef, <16 x i8> undef, i64 0) %3 = tail call @llvm.vector.insert.nxv8i8.v16i8( undef, <16 x i8> poison, i64 0) diff --git a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll index 9cb3991f31f94..08b310213d16e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll @@ -126,28 +126,28 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) { ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vsaddu.vx v8, v8, a1 -; CHECK-NEXT: vmsltu.vx v0, v8, a2 ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_0) -; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: vmsltu.vx v0, v8, a2 ; CHECK-NEXT: lui a0, %hi(.LCPI9_1) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_1) -; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsext.vf8 v24, v16 +; CHECK-NEXT: vsaddu.vx v16, v24, a1 +; CHECK-NEXT: vmsltu.vx v9, v16, a2 ; CHECK-NEXT: vsext.vf8 v16, v8 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v8, v16, a2 -; CHECK-NEXT: vsext.vf8 v16, v9 -; CHECK-NEXT: vsaddu.vx v16, v16, a1 ; CHECK-NEXT: lui a0, %hi(.LCPI9_2) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_2) -; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmsltu.vx v10, v16, a2 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v0, v8, 2 +; CHECK-NEXT: vslideup.vi v0, v9, 2 ; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v0, v10, 4 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf8 v16, v9 +; CHECK-NEXT: vsext.vf8 v16, v8 ; CHECK-NEXT: vsaddu.vx v8, v16, a1 ; CHECK-NEXT: vmsltu.vx v16, v8, a2 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma @@ -169,13 +169,13 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) { ; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: vsext.vf8 v16, v8 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v10, v16, a2 +; CHECK-NEXT: vmsltu.vx v8, v16, a2 ; CHECK-NEXT: vsext.vf8 v16, v9 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v8, v16, a2 ; CHECK-NEXT: lui a0, %hi(.LCPI10_2) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_2) ; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vmsltu.vx v10, v16, a2 ; CHECK-NEXT: lui a0, %hi(.LCPI10_3) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_3) ; CHECK-NEXT: vle8.v v11, (a0) @@ -187,10 +187,10 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) { ; CHECK-NEXT: vmsltu.vx v11, v16, a2 ; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: lui a0, %hi(.LCPI10_4) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_4) ; CHECK-NEXT: vle8.v v12, (a0) +; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: lui a0, %hi(.LCPI10_5) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_5) ; CHECK-NEXT: vle8.v v13, (a0) @@ -201,27 +201,27 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) { ; CHECK-NEXT: vsaddu.vx v16, v16, a1 ; CHECK-NEXT: vmsltu.vx v13, v16, a2 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v10, 2 +; CHECK-NEXT: vslideup.vi v10, v8, 2 ; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v9, 4 +; CHECK-NEXT: vslideup.vi v10, v9, 4 ; CHECK-NEXT: lui a0, %hi(.LCPI10_6) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_6) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v11, 6 +; CHECK-NEXT: vslideup.vi v10, v11, 6 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v0, v12, 2 ; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v0, v13, 4 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf8 v16, v9 +; CHECK-NEXT: vsext.vf8 v16, v8 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v9, v16, a2 +; CHECK-NEXT: vmsltu.vx v8, v16, a2 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v0, v9, 6 +; CHECK-NEXT: vslideup.vi v0, v8, 6 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vi v0, v8, 8 +; CHECK-NEXT: vslideup.vi v0, v10, 8 ; CHECK-NEXT: ret %mask = call <128 x i1> @llvm.get.active.lane.mask.v128i1.i64(i64 %index, i64 %tc) ret <128 x i1> %mask diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll index fff280c005b54..df413b878172b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll @@ -2574,9 +2574,8 @@ define @vp_ctlz_nxv1i9( %va, @vp_ctlz_nxv1i9( %va, @vp_ctlz_zero_undef_nxv1i9( %va, @vp_ctlz_zero_undef_nxv1i9( %va, @vp_ctpop_nxv1i9( %va, @vp_ctpop_nxv1i9( %va, @llvm.vp.ctpop.nxv1i9( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll index 75747a6674b7b..d8781495abd75 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll @@ -7,7 +7,7 @@ define @ceil_nxv1f16( %x) strictfp { ; CHECK-LABEL: ceil_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) @@ -15,6 +15,7 @@ define @ceil_nxv1f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -29,7 +30,7 @@ declare @llvm.experimental.constrained.ceil.nxv1f16( @ceil_nxv2f16( %x) strictfp { ; CHECK-LABEL: ceil_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) @@ -37,6 +38,7 @@ define @ceil_nxv2f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -51,7 +53,7 @@ declare @llvm.experimental.constrained.ceil.nxv2f16( @ceil_nxv4f16( %x) strictfp { ; CHECK-LABEL: ceil_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) @@ -59,6 +61,7 @@ define @ceil_nxv4f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -73,7 +76,7 @@ declare @llvm.experimental.constrained.ceil.nxv4f16( @ceil_nxv8f16( %x) strictfp { ; CHECK-LABEL: ceil_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) @@ -81,6 +84,7 @@ define @ceil_nxv8f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -95,7 +99,7 @@ declare @llvm.experimental.constrained.ceil.nxv8f16( @ceil_nxv16f16( %x) strictfp { ; CHECK-LABEL: ceil_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) @@ -103,6 +107,7 @@ define @ceil_nxv16f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -117,7 +122,7 @@ declare @llvm.experimental.constrained.ceil.nxv16f16( @ceil_nxv32f16( %x) strictfp { ; CHECK-LABEL: ceil_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) @@ -125,6 +130,7 @@ define @ceil_nxv32f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -139,7 +145,7 @@ declare @llvm.experimental.constrained.ceil.nxv32f16( @ceil_nxv1f32( %x) strictfp { ; CHECK-LABEL: ceil_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -147,6 +153,7 @@ define @ceil_nxv1f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -161,7 +168,7 @@ declare @llvm.experimental.constrained.ceil.nxv1f32( @ceil_nxv2f32( %x) strictfp { ; CHECK-LABEL: ceil_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -169,6 +176,7 @@ define @ceil_nxv2f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -183,7 +191,7 @@ declare @llvm.experimental.constrained.ceil.nxv2f32( @ceil_nxv4f32( %x) strictfp { ; CHECK-LABEL: ceil_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 @@ -191,6 +199,7 @@ define @ceil_nxv4f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -205,7 +214,7 @@ declare @llvm.experimental.constrained.ceil.nxv4f32( @ceil_nxv8f32( %x) strictfp { ; CHECK-LABEL: ceil_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 @@ -213,6 +222,7 @@ define @ceil_nxv8f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -227,7 +237,7 @@ declare @llvm.experimental.constrained.ceil.nxv8f32( @ceil_nxv16f32( %x) strictfp { ; CHECK-LABEL: ceil_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 @@ -235,6 +245,7 @@ define @ceil_nxv16f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -249,7 +260,7 @@ declare @llvm.experimental.constrained.ceil.nxv16f32( @ceil_nxv1f64( %x) strictfp { ; CHECK-LABEL: ceil_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) @@ -257,6 +268,7 @@ define @ceil_nxv1f64( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -271,7 +283,7 @@ declare @llvm.experimental.constrained.ceil.nxv1f64( @ceil_nxv2f64( %x) strictfp { ; CHECK-LABEL: ceil_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) @@ -279,6 +291,7 @@ define @ceil_nxv2f64( %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -293,7 +306,7 @@ declare @llvm.experimental.constrained.ceil.nxv2f64( @ceil_nxv4f64( %x) strictfp { ; CHECK-LABEL: ceil_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) @@ -301,6 +314,7 @@ define @ceil_nxv4f64( %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -315,7 +329,7 @@ declare @llvm.experimental.constrained.ceil.nxv4f64( @ceil_nxv8f64( %x) strictfp { ; CHECK-LABEL: ceil_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) @@ -323,6 +337,7 @@ define @ceil_nxv8f64( %x) strictfp { ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll index 31a9453204457..1df452d8641c5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll @@ -7,7 +7,7 @@ define @floor_nxv1f16( %x) strictfp { ; CHECK-LABEL: floor_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) @@ -15,6 +15,7 @@ define @floor_nxv1f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -29,7 +30,7 @@ declare @llvm.experimental.constrained.floor.nxv1f16( @floor_nxv2f16( %x) strictfp { ; CHECK-LABEL: floor_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) @@ -37,6 +38,7 @@ define @floor_nxv2f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -51,7 +53,7 @@ declare @llvm.experimental.constrained.floor.nxv2f16( @floor_nxv4f16( %x) strictfp { ; CHECK-LABEL: floor_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) @@ -59,6 +61,7 @@ define @floor_nxv4f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -73,7 +76,7 @@ declare @llvm.experimental.constrained.floor.nxv4f16( @floor_nxv8f16( %x) strictfp { ; CHECK-LABEL: floor_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) @@ -81,6 +84,7 @@ define @floor_nxv8f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -95,7 +99,7 @@ declare @llvm.experimental.constrained.floor.nxv8f16( @floor_nxv16f16( %x) strictfp { ; CHECK-LABEL: floor_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) @@ -103,6 +107,7 @@ define @floor_nxv16f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -117,7 +122,7 @@ declare @llvm.experimental.constrained.floor.nxv16f16( @floor_nxv32f16( %x) strictfp { ; CHECK-LABEL: floor_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) @@ -125,6 +130,7 @@ define @floor_nxv32f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -139,7 +145,7 @@ declare @llvm.experimental.constrained.floor.nxv32f16( @floor_nxv1f32( %x) strictfp { ; CHECK-LABEL: floor_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -147,6 +153,7 @@ define @floor_nxv1f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -161,7 +168,7 @@ declare @llvm.experimental.constrained.floor.nxv1f32( @floor_nxv2f32( %x) strictfp { ; CHECK-LABEL: floor_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -169,6 +176,7 @@ define @floor_nxv2f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -183,7 +191,7 @@ declare @llvm.experimental.constrained.floor.nxv2f32( @floor_nxv4f32( %x) strictfp { ; CHECK-LABEL: floor_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 @@ -191,6 +199,7 @@ define @floor_nxv4f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -205,7 +214,7 @@ declare @llvm.experimental.constrained.floor.nxv4f32( @floor_nxv8f32( %x) strictfp { ; CHECK-LABEL: floor_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 @@ -213,6 +222,7 @@ define @floor_nxv8f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -227,7 +237,7 @@ declare @llvm.experimental.constrained.floor.nxv8f32( @floor_nxv16f32( %x) strictfp { ; CHECK-LABEL: floor_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 @@ -235,6 +245,7 @@ define @floor_nxv16f32( %x) strictfp ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -249,7 +260,7 @@ declare @llvm.experimental.constrained.floor.nxv16f32( @floor_nxv1f64( %x) strictfp { ; CHECK-LABEL: floor_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) @@ -257,6 +268,7 @@ define @floor_nxv1f64( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -271,7 +283,7 @@ declare @llvm.experimental.constrained.floor.nxv1f64( @floor_nxv2f64( %x) strictfp { ; CHECK-LABEL: floor_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) @@ -279,6 +291,7 @@ define @floor_nxv2f64( %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -293,7 +306,7 @@ declare @llvm.experimental.constrained.floor.nxv2f64( @floor_nxv4f64( %x) strictfp { ; CHECK-LABEL: floor_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) @@ -301,6 +314,7 @@ define @floor_nxv4f64( %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -315,7 +329,7 @@ declare @llvm.experimental.constrained.floor.nxv4f64( @floor_nxv8f64( %x) strictfp { ; CHECK-LABEL: floor_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) @@ -323,6 +337,7 @@ define @floor_nxv8f64( %x) strictfp { ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll index 1e93a73ede5d6..404fb72b8abe9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll @@ -7,7 +7,7 @@ define <1 x half> @ceil_v1f16(<1 x half> %x) strictfp { ; CHECK-LABEL: ceil_v1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) @@ -15,6 +15,7 @@ define <1 x half> @ceil_v1f16(<1 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -29,7 +30,7 @@ declare <1 x half> @llvm.experimental.constrained.ceil.v1f16(<1 x half>, metadat define <2 x half> @ceil_v2f16(<2 x half> %x) strictfp { ; CHECK-LABEL: ceil_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) @@ -37,6 +38,7 @@ define <2 x half> @ceil_v2f16(<2 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -51,7 +53,7 @@ declare <2 x half> @llvm.experimental.constrained.ceil.v2f16(<2 x half>, metadat define <4 x half> @ceil_v4f16(<4 x half> %x) strictfp { ; CHECK-LABEL: ceil_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) @@ -59,6 +61,7 @@ define <4 x half> @ceil_v4f16(<4 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -73,7 +76,7 @@ declare <4 x half> @llvm.experimental.constrained.ceil.v4f16(<4 x half>, metadat define <8 x half> @ceil_v8f16(<8 x half> %x) strictfp { ; CHECK-LABEL: ceil_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) @@ -81,6 +84,7 @@ define <8 x half> @ceil_v8f16(<8 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -95,7 +99,7 @@ declare <8 x half> @llvm.experimental.constrained.ceil.v8f16(<8 x half>, metadat define <16 x half> @ceil_v16f16(<16 x half> %x) strictfp { ; CHECK-LABEL: ceil_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) @@ -103,6 +107,7 @@ define <16 x half> @ceil_v16f16(<16 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -118,7 +123,7 @@ define <32 x half> @ceil_v32f16(<32 x half> %x) strictfp { ; CHECK-LABEL: ceil_v32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) @@ -126,6 +131,7 @@ define <32 x half> @ceil_v32f16(<32 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -140,7 +146,7 @@ declare <32 x half> @llvm.experimental.constrained.ceil.v32f16(<32 x half>, meta define <1 x float> @ceil_v1f32(<1 x float> %x) strictfp { ; CHECK-LABEL: ceil_v1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -148,6 +154,7 @@ define <1 x float> @ceil_v1f32(<1 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -162,7 +169,7 @@ declare <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float>, metad define <2 x float> @ceil_v2f32(<2 x float> %x) strictfp { ; CHECK-LABEL: ceil_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -170,6 +177,7 @@ define <2 x float> @ceil_v2f32(<2 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -184,7 +192,7 @@ declare <2 x float> @llvm.experimental.constrained.ceil.v2f32(<2 x float>, metad define <4 x float> @ceil_v4f32(<4 x float> %x) strictfp { ; CHECK-LABEL: ceil_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -192,6 +200,7 @@ define <4 x float> @ceil_v4f32(<4 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -206,7 +215,7 @@ declare <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float>, metad define <8 x float> @ceil_v8f32(<8 x float> %x) strictfp { ; CHECK-LABEL: ceil_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 @@ -214,6 +223,7 @@ define <8 x float> @ceil_v8f32(<8 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -228,7 +238,7 @@ declare <8 x float> @llvm.experimental.constrained.ceil.v8f32(<8 x float>, metad define <16 x float> @ceil_v16f32(<16 x float> %x) strictfp { ; CHECK-LABEL: ceil_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 @@ -236,6 +246,7 @@ define <16 x float> @ceil_v16f32(<16 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -250,7 +261,7 @@ declare <16 x float> @llvm.experimental.constrained.ceil.v16f32(<16 x float>, me define <1 x double> @ceil_v1f64(<1 x double> %x) strictfp { ; CHECK-LABEL: ceil_v1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) @@ -258,6 +269,7 @@ define <1 x double> @ceil_v1f64(<1 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -272,7 +284,7 @@ declare <1 x double> @llvm.experimental.constrained.ceil.v1f64(<1 x double>, met define <2 x double> @ceil_v2f64(<2 x double> %x) strictfp { ; CHECK-LABEL: ceil_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) @@ -280,6 +292,7 @@ define <2 x double> @ceil_v2f64(<2 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -294,7 +307,7 @@ declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, met define <4 x double> @ceil_v4f64(<4 x double> %x) strictfp { ; CHECK-LABEL: ceil_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) @@ -302,6 +315,7 @@ define <4 x double> @ceil_v4f64(<4 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -316,7 +330,7 @@ declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, met define <8 x double> @ceil_v8f64(<8 x double> %x) strictfp { ; CHECK-LABEL: ceil_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) @@ -324,6 +338,7 @@ define <8 x double> @ceil_v8f64(<8 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll index 53018939fc6eb..2319aab370d2d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll @@ -7,7 +7,7 @@ define <1 x half> @floor_v1f16(<1 x half> %x) strictfp { ; CHECK-LABEL: floor_v1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) @@ -15,6 +15,7 @@ define <1 x half> @floor_v1f16(<1 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -29,7 +30,7 @@ declare <1 x half> @llvm.experimental.constrained.floor.v1f16(<1 x half>, metada define <2 x half> @floor_v2f16(<2 x half> %x) strictfp { ; CHECK-LABEL: floor_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) @@ -37,6 +38,7 @@ define <2 x half> @floor_v2f16(<2 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -51,7 +53,7 @@ declare <2 x half> @llvm.experimental.constrained.floor.v2f16(<2 x half>, metada define <4 x half> @floor_v4f16(<4 x half> %x) strictfp { ; CHECK-LABEL: floor_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) @@ -59,6 +61,7 @@ define <4 x half> @floor_v4f16(<4 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -73,7 +76,7 @@ declare <4 x half> @llvm.experimental.constrained.floor.v4f16(<4 x half>, metada define <8 x half> @floor_v8f16(<8 x half> %x) strictfp { ; CHECK-LABEL: floor_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) @@ -81,6 +84,7 @@ define <8 x half> @floor_v8f16(<8 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -95,7 +99,7 @@ declare <8 x half> @llvm.experimental.constrained.floor.v8f16(<8 x half>, metada define <16 x half> @floor_v16f16(<16 x half> %x) strictfp { ; CHECK-LABEL: floor_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) @@ -103,6 +107,7 @@ define <16 x half> @floor_v16f16(<16 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -118,7 +123,7 @@ define <32 x half> @floor_v32f16(<32 x half> %x) strictfp { ; CHECK-LABEL: floor_v32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) @@ -126,6 +131,7 @@ define <32 x half> @floor_v32f16(<32 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -140,7 +146,7 @@ declare <32 x half> @llvm.experimental.constrained.floor.v32f16(<32 x half>, met define <1 x float> @floor_v1f32(<1 x float> %x) strictfp { ; CHECK-LABEL: floor_v1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -148,6 +154,7 @@ define <1 x float> @floor_v1f32(<1 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -162,7 +169,7 @@ declare <1 x float> @llvm.experimental.constrained.floor.v1f32(<1 x float>, meta define <2 x float> @floor_v2f32(<2 x float> %x) strictfp { ; CHECK-LABEL: floor_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -170,6 +177,7 @@ define <2 x float> @floor_v2f32(<2 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -184,7 +192,7 @@ declare <2 x float> @llvm.experimental.constrained.floor.v2f32(<2 x float>, meta define <4 x float> @floor_v4f32(<4 x float> %x) strictfp { ; CHECK-LABEL: floor_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -192,6 +200,7 @@ define <4 x float> @floor_v4f32(<4 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -206,7 +215,7 @@ declare <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float>, meta define <8 x float> @floor_v8f32(<8 x float> %x) strictfp { ; CHECK-LABEL: floor_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 @@ -214,6 +223,7 @@ define <8 x float> @floor_v8f32(<8 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -228,7 +238,7 @@ declare <8 x float> @llvm.experimental.constrained.floor.v8f32(<8 x float>, meta define <16 x float> @floor_v16f32(<16 x float> %x) strictfp { ; CHECK-LABEL: floor_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 @@ -236,6 +246,7 @@ define <16 x float> @floor_v16f32(<16 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -250,7 +261,7 @@ declare <16 x float> @llvm.experimental.constrained.floor.v16f32(<16 x float>, m define <1 x double> @floor_v1f64(<1 x double> %x) strictfp { ; CHECK-LABEL: floor_v1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) @@ -258,6 +269,7 @@ define <1 x double> @floor_v1f64(<1 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -272,7 +284,7 @@ declare <1 x double> @llvm.experimental.constrained.floor.v1f64(<1 x double>, me define <2 x double> @floor_v2f64(<2 x double> %x) strictfp { ; CHECK-LABEL: floor_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) @@ -280,6 +292,7 @@ define <2 x double> @floor_v2f64(<2 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -294,7 +307,7 @@ declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, me define <4 x double> @floor_v4f64(<4 x double> %x) strictfp { ; CHECK-LABEL: floor_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) @@ -302,6 +315,7 @@ define <4 x double> @floor_v4f64(<4 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -316,7 +330,7 @@ declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, me define <8 x double> @floor_v8f64(<8 x double> %x) strictfp { ; CHECK-LABEL: floor_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) @@ -324,6 +338,7 @@ define <8 x double> @floor_v8f64(<8 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll index 9e9a8b8a4b644..719dd52494284 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll @@ -9,7 +9,7 @@ declare <2 x half> @llvm.experimental.constrained.nearbyint.v2f16(<2 x half>, me define <2 x half> @nearbyint_v2f16(<2 x half> %v) strictfp { ; CHECK-LABEL: nearbyint_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) @@ -17,6 +17,7 @@ define <2 x half> @nearbyint_v2f16(<2 x half> %v) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -32,7 +33,7 @@ declare <4 x half> @llvm.experimental.constrained.nearbyint.v4f16(<4 x half>, me define <4 x half> @nearbyint_v4f16(<4 x half> %v) strictfp { ; CHECK-LABEL: nearbyint_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) @@ -40,6 +41,7 @@ define <4 x half> @nearbyint_v4f16(<4 x half> %v) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu @@ -55,7 +57,7 @@ declare <8 x half> @llvm.experimental.constrained.nearbyint.v8f16(<8 x half>, me define <8 x half> @nearbyint_v8f16(<8 x half> %v) strictfp { ; CHECK-LABEL: nearbyint_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) @@ -63,6 +65,7 @@ define <8 x half> @nearbyint_v8f16(<8 x half> %v) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -78,7 +81,7 @@ declare <16 x half> @llvm.experimental.constrained.nearbyint.v16f16(<16 x half>, define <16 x half> @nearbyint_v16f16(<16 x half> %v) strictfp { ; CHECK-LABEL: nearbyint_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) @@ -86,6 +89,7 @@ define <16 x half> @nearbyint_v16f16(<16 x half> %v) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu @@ -102,7 +106,7 @@ define <32 x half> @nearbyint_v32f16(<32 x half> %v) strictfp { ; CHECK-LABEL: nearbyint_v32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) @@ -110,6 +114,7 @@ define <32 x half> @nearbyint_v32f16(<32 x half> %v) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu @@ -125,7 +130,7 @@ declare <2 x float> @llvm.experimental.constrained.nearbyint.v2f32(<2 x float>, define <2 x float> @nearbyint_v2f32(<2 x float> %v) strictfp { ; CHECK-LABEL: nearbyint_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -133,6 +138,7 @@ define <2 x float> @nearbyint_v2f32(<2 x float> %v) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu @@ -148,7 +154,7 @@ declare <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float>, define <4 x float> @nearbyint_v4f32(<4 x float> %v) strictfp { ; CHECK-LABEL: nearbyint_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -156,6 +162,7 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %v) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu @@ -171,7 +178,7 @@ declare <8 x float> @llvm.experimental.constrained.nearbyint.v8f32(<8 x float>, define <8 x float> @nearbyint_v8f32(<8 x float> %v) strictfp { ; CHECK-LABEL: nearbyint_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 @@ -179,6 +186,7 @@ define <8 x float> @nearbyint_v8f32(<8 x float> %v) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -194,7 +202,7 @@ declare <16 x float> @llvm.experimental.constrained.nearbyint.v16f32(<16 x float define <16 x float> @nearbyint_v16f32(<16 x float> %v) strictfp { ; CHECK-LABEL: nearbyint_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 @@ -202,6 +210,7 @@ define <16 x float> @nearbyint_v16f32(<16 x float> %v) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu @@ -217,7 +226,7 @@ declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> define <2 x double> @nearbyint_v2f64(<2 x double> %v) strictfp { ; CHECK-LABEL: nearbyint_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI9_0)(a0) @@ -225,6 +234,7 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %v) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -240,7 +250,7 @@ declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double> define <4 x double> @nearbyint_v4f64(<4 x double> %v) strictfp { ; CHECK-LABEL: nearbyint_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI10_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI10_0)(a0) @@ -248,6 +258,7 @@ define <4 x double> @nearbyint_v4f64(<4 x double> %v) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu @@ -263,7 +274,7 @@ declare <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double> define <8 x double> @nearbyint_v8f64(<8 x double> %v) strictfp { ; CHECK-LABEL: nearbyint_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) @@ -271,6 +282,7 @@ define <8 x double> @nearbyint_v8f64(<8 x double> %v) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-constrained-sdnode.ll index f189354237ee3..e855d9504ff40 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-constrained-sdnode.ll @@ -9,7 +9,7 @@ define <1 x half> @round_v1f16(<1 x half> %x) strictfp { ; CHECK-LABEL: round_v1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) @@ -17,6 +17,7 @@ define <1 x half> @round_v1f16(<1 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -31,7 +32,7 @@ declare <1 x half> @llvm.experimental.constrained.round.v1f16(<1 x half>, metada define <2 x half> @round_v2f16(<2 x half> %x) strictfp { ; CHECK-LABEL: round_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) @@ -39,6 +40,7 @@ define <2 x half> @round_v2f16(<2 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -53,7 +55,7 @@ declare <2 x half> @llvm.experimental.constrained.round.v2f16(<2 x half>, metada define <4 x half> @round_v4f16(<4 x half> %x) strictfp { ; CHECK-LABEL: round_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) @@ -61,6 +63,7 @@ define <4 x half> @round_v4f16(<4 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -75,7 +78,7 @@ declare <4 x half> @llvm.experimental.constrained.round.v4f16(<4 x half>, metada define <8 x half> @round_v8f16(<8 x half> %x) strictfp { ; CHECK-LABEL: round_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) @@ -83,6 +86,7 @@ define <8 x half> @round_v8f16(<8 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -97,7 +101,7 @@ declare <8 x half> @llvm.experimental.constrained.round.v8f16(<8 x half>, metada define <16 x half> @round_v16f16(<16 x half> %x) strictfp { ; CHECK-LABEL: round_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) @@ -105,6 +109,7 @@ define <16 x half> @round_v16f16(<16 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -120,7 +125,7 @@ define <32 x half> @round_v32f16(<32 x half> %x) strictfp { ; CHECK-LABEL: round_v32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) @@ -128,6 +133,7 @@ define <32 x half> @round_v32f16(<32 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -142,7 +148,7 @@ declare <32 x half> @llvm.experimental.constrained.round.v32f16(<32 x half>, met define <1 x float> @round_v1f32(<1 x float> %x) strictfp { ; CHECK-LABEL: round_v1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -150,6 +156,7 @@ define <1 x float> @round_v1f32(<1 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -164,7 +171,7 @@ declare <1 x float> @llvm.experimental.constrained.round.v1f32(<1 x float>, meta define <2 x float> @round_v2f32(<2 x float> %x) strictfp { ; CHECK-LABEL: round_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -172,6 +179,7 @@ define <2 x float> @round_v2f32(<2 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -186,7 +194,7 @@ declare <2 x float> @llvm.experimental.constrained.round.v2f32(<2 x float>, meta define <4 x float> @round_v4f32(<4 x float> %x) strictfp { ; CHECK-LABEL: round_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -194,6 +202,7 @@ define <4 x float> @round_v4f32(<4 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -208,7 +217,7 @@ declare <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x float>, meta define <8 x float> @round_v8f32(<8 x float> %x) strictfp { ; CHECK-LABEL: round_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 @@ -216,6 +225,7 @@ define <8 x float> @round_v8f32(<8 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -230,7 +240,7 @@ declare <8 x float> @llvm.experimental.constrained.round.v8f32(<8 x float>, meta define <16 x float> @round_v16f32(<16 x float> %x) strictfp { ; CHECK-LABEL: round_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 @@ -238,6 +248,7 @@ define <16 x float> @round_v16f32(<16 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -252,7 +263,7 @@ declare <16 x float> @llvm.experimental.constrained.round.v16f32(<16 x float>, m define <1 x double> @round_v1f64(<1 x double> %x) strictfp { ; CHECK-LABEL: round_v1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) @@ -260,6 +271,7 @@ define <1 x double> @round_v1f64(<1 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -274,7 +286,7 @@ declare <1 x double> @llvm.experimental.constrained.round.v1f64(<1 x double>, me define <2 x double> @round_v2f64(<2 x double> %x) strictfp { ; CHECK-LABEL: round_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) @@ -282,6 +294,7 @@ define <2 x double> @round_v2f64(<2 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -296,7 +309,7 @@ declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, me define <4 x double> @round_v4f64(<4 x double> %x) strictfp { ; CHECK-LABEL: round_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) @@ -304,6 +317,7 @@ define <4 x double> @round_v4f64(<4 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -318,7 +332,7 @@ declare <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double>, me define <8 x double> @round_v8f64(<8 x double> %x) strictfp { ; CHECK-LABEL: round_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) @@ -326,6 +340,7 @@ define <8 x double> @round_v8f64(<8 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll index 11920c7c31c98..9976cd2a8ab29 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll @@ -9,7 +9,7 @@ define <1 x half> @roundeven_v1f16(<1 x half> %x) strictfp { ; CHECK-LABEL: roundeven_v1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) @@ -17,6 +17,7 @@ define <1 x half> @roundeven_v1f16(<1 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -31,7 +32,7 @@ declare <1 x half> @llvm.experimental.constrained.roundeven.v1f16(<1 x half>, me define <2 x half> @roundeven_v2f16(<2 x half> %x) strictfp { ; CHECK-LABEL: roundeven_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) @@ -39,6 +40,7 @@ define <2 x half> @roundeven_v2f16(<2 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -53,7 +55,7 @@ declare <2 x half> @llvm.experimental.constrained.roundeven.v2f16(<2 x half>, me define <4 x half> @roundeven_v4f16(<4 x half> %x) strictfp { ; CHECK-LABEL: roundeven_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) @@ -61,6 +63,7 @@ define <4 x half> @roundeven_v4f16(<4 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -75,7 +78,7 @@ declare <4 x half> @llvm.experimental.constrained.roundeven.v4f16(<4 x half>, me define <8 x half> @roundeven_v8f16(<8 x half> %x) strictfp { ; CHECK-LABEL: roundeven_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) @@ -83,6 +86,7 @@ define <8 x half> @roundeven_v8f16(<8 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -97,7 +101,7 @@ declare <8 x half> @llvm.experimental.constrained.roundeven.v8f16(<8 x half>, me define <16 x half> @roundeven_v16f16(<16 x half> %x) strictfp { ; CHECK-LABEL: roundeven_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) @@ -105,6 +109,7 @@ define <16 x half> @roundeven_v16f16(<16 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -120,7 +125,7 @@ define <32 x half> @roundeven_v32f16(<32 x half> %x) strictfp { ; CHECK-LABEL: roundeven_v32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) @@ -128,6 +133,7 @@ define <32 x half> @roundeven_v32f16(<32 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -142,7 +148,7 @@ declare <32 x half> @llvm.experimental.constrained.roundeven.v32f16(<32 x half>, define <1 x float> @roundeven_v1f32(<1 x float> %x) strictfp { ; CHECK-LABEL: roundeven_v1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -150,6 +156,7 @@ define <1 x float> @roundeven_v1f32(<1 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -164,7 +171,7 @@ declare <1 x float> @llvm.experimental.constrained.roundeven.v1f32(<1 x float>, define <2 x float> @roundeven_v2f32(<2 x float> %x) strictfp { ; CHECK-LABEL: roundeven_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -172,6 +179,7 @@ define <2 x float> @roundeven_v2f32(<2 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -186,7 +194,7 @@ declare <2 x float> @llvm.experimental.constrained.roundeven.v2f32(<2 x float>, define <4 x float> @roundeven_v4f32(<4 x float> %x) strictfp { ; CHECK-LABEL: roundeven_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -194,6 +202,7 @@ define <4 x float> @roundeven_v4f32(<4 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -208,7 +217,7 @@ declare <4 x float> @llvm.experimental.constrained.roundeven.v4f32(<4 x float>, define <8 x float> @roundeven_v8f32(<8 x float> %x) strictfp { ; CHECK-LABEL: roundeven_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 @@ -216,6 +225,7 @@ define <8 x float> @roundeven_v8f32(<8 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -230,7 +240,7 @@ declare <8 x float> @llvm.experimental.constrained.roundeven.v8f32(<8 x float>, define <16 x float> @roundeven_v16f32(<16 x float> %x) strictfp { ; CHECK-LABEL: roundeven_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 @@ -238,6 +248,7 @@ define <16 x float> @roundeven_v16f32(<16 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -252,7 +263,7 @@ declare <16 x float> @llvm.experimental.constrained.roundeven.v16f32(<16 x float define <1 x double> @roundeven_v1f64(<1 x double> %x) strictfp { ; CHECK-LABEL: roundeven_v1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) @@ -260,6 +271,7 @@ define <1 x double> @roundeven_v1f64(<1 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -274,7 +286,7 @@ declare <1 x double> @llvm.experimental.constrained.roundeven.v1f64(<1 x double> define <2 x double> @roundeven_v2f64(<2 x double> %x) strictfp { ; CHECK-LABEL: roundeven_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) @@ -282,6 +294,7 @@ define <2 x double> @roundeven_v2f64(<2 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -296,7 +309,7 @@ declare <2 x double> @llvm.experimental.constrained.roundeven.v2f64(<2 x double> define <4 x double> @roundeven_v4f64(<4 x double> %x) strictfp { ; CHECK-LABEL: roundeven_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) @@ -304,6 +317,7 @@ define <4 x double> @roundeven_v4f64(<4 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -318,7 +332,7 @@ declare <4 x double> @llvm.experimental.constrained.roundeven.v4f64(<4 x double> define <8 x double> @roundeven_v8f64(<8 x double> %x) strictfp { ; CHECK-LABEL: roundeven_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) @@ -326,6 +340,7 @@ define <8 x double> @roundeven_v8f64(<8 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll index f16581444afca..eac26451d5a8c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll @@ -7,13 +7,14 @@ define <1 x half> @trunc_v1f16(<1 x half> %x) strictfp { ; CHECK-LABEL: trunc_v1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -27,13 +28,14 @@ declare <1 x half> @llvm.experimental.constrained.trunc.v1f16(<1 x half>, metada define <2 x half> @trunc_v2f16(<2 x half> %x) strictfp { ; CHECK-LABEL: trunc_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -47,13 +49,14 @@ declare <2 x half> @llvm.experimental.constrained.trunc.v2f16(<2 x half>, metada define <4 x half> @trunc_v4f16(<4 x half> %x) strictfp { ; CHECK-LABEL: trunc_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu @@ -67,13 +70,14 @@ declare <4 x half> @llvm.experimental.constrained.trunc.v4f16(<4 x half>, metada define <8 x half> @trunc_v8f16(<8 x half> %x) strictfp { ; CHECK-LABEL: trunc_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -87,13 +91,14 @@ declare <8 x half> @llvm.experimental.constrained.trunc.v8f16(<8 x half>, metada define <16 x half> @trunc_v16f16(<16 x half> %x) strictfp { ; CHECK-LABEL: trunc_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu @@ -108,13 +113,14 @@ define <32 x half> @trunc_v32f16(<32 x half> %x) strictfp { ; CHECK-LABEL: trunc_v32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu @@ -128,13 +134,14 @@ declare <32 x half> @llvm.experimental.constrained.trunc.v32f16(<32 x half>, met define <1 x float> @trunc_v1f32(<1 x float> %x) strictfp { ; CHECK-LABEL: trunc_v1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu @@ -148,13 +155,14 @@ declare <1 x float> @llvm.experimental.constrained.trunc.v1f32(<1 x float>, meta define <2 x float> @trunc_v2f32(<2 x float> %x) strictfp { ; CHECK-LABEL: trunc_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu @@ -168,13 +176,14 @@ declare <2 x float> @llvm.experimental.constrained.trunc.v2f32(<2 x float>, meta define <4 x float> @trunc_v4f32(<4 x float> %x) strictfp { ; CHECK-LABEL: trunc_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu @@ -188,13 +197,14 @@ declare <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float>, meta define <8 x float> @trunc_v8f32(<8 x float> %x) strictfp { ; CHECK-LABEL: trunc_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -208,13 +218,14 @@ declare <8 x float> @llvm.experimental.constrained.trunc.v8f32(<8 x float>, meta define <16 x float> @trunc_v16f32(<16 x float> %x) strictfp { ; CHECK-LABEL: trunc_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu @@ -228,13 +239,14 @@ declare <16 x float> @llvm.experimental.constrained.trunc.v16f32(<16 x float>, m define <1 x double> @trunc_v1f64(<1 x double> %x) strictfp { ; CHECK-LABEL: trunc_v1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -248,13 +260,14 @@ declare <1 x double> @llvm.experimental.constrained.trunc.v1f64(<1 x double>, me define <2 x double> @trunc_v2f64(<2 x double> %x) strictfp { ; CHECK-LABEL: trunc_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -268,13 +281,14 @@ declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, me define <4 x double> @trunc_v4f64(<4 x double> %x) strictfp { ; CHECK-LABEL: trunc_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu @@ -288,13 +302,14 @@ declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, me define <8 x double> @trunc_v8f64(<8 x double> %x) strictfp { ; CHECK-LABEL: trunc_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index 79c36a629465d..f4d7074c7f6b2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -3459,6 +3459,8 @@ define void @mulhu_v4i64(ptr %x) { ; RV64-NEXT: lui a1, %hi(.LCPI184_0) ; RV64-NEXT: addi a1, a1, %lo(.LCPI184_0) ; RV64-NEXT: vle64.v v10, (a1) +; RV64-NEXT: vmulhu.vv v10, v8, v10 +; RV64-NEXT: vsub.vv v8, v8, v10 ; RV64-NEXT: li a1, -1 ; RV64-NEXT: slli a1, a1, 63 ; RV64-NEXT: vmv.s.x v12, a1 @@ -3466,8 +3468,6 @@ define void @mulhu_v4i64(ptr %x) { ; RV64-NEXT: vsetivli zero, 3, e64, m2, tu, ma ; RV64-NEXT: vslideup.vi v14, v12, 2 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vmulhu.vv v10, v8, v10 -; RV64-NEXT: vsub.vv v8, v8, v10 ; RV64-NEXT: vmulhu.vv v8, v8, v14 ; RV64-NEXT: vadd.vv v8, v8, v10 ; RV64-NEXT: lui a1, 12320 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index 178a920169ad9..bc3e135a588a6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -159,17 +159,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 82 +; RV32-NEXT: li a3, 80 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd2, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 82 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 80 * vlenb ; RV32-NEXT: addi a3, a1, 256 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vle32.v v16, (a3) ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 57 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 6 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill @@ -177,26 +176,26 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vslideup.vi v8, v16, 4 ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 41 +; RV32-NEXT: li a5, 40 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: lui a4, 12 -; RV32-NEXT: vmv.s.x v1, a4 +; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV32-NEXT: vslidedown.vi v16, v16, 16 ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a5, a4, 6 -; RV32-NEXT: add a4, a5, a4 +; RV32-NEXT: li a5, 56 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vmv1r.v v3, v0 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vslideup.vi v8, v16, 10, v0.t ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 45 +; RV32-NEXT: li a5, 44 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 @@ -206,8 +205,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: vle16.v v8, (a4) ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a5, a4, 5 -; RV32-NEXT: add a4, a5, a4 +; RV32-NEXT: slli a4, a4, 5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill @@ -216,21 +214,21 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: lui a5, 1 ; RV32-NEXT: vle16.v v8, (a4) ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a6, 25 +; RV32-NEXT: li a6, 24 ; RV32-NEXT: mul a4, a4, a6 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 73 +; RV32-NEXT: li a4, 72 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vle32.v v24, (a3) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 49 +; RV32-NEXT: li a3, 48 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -238,27 +236,26 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: addi a1, a5, -64 ; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 37 +; RV32-NEXT: li a3, 36 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v16, v8, v4 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 25 +; RV32-NEXT: li a3, 24 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v16, v24, v8, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 45 +; RV32-NEXT: li a3, 44 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -266,259 +263,257 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 45 +; RV32-NEXT: li a3, 44 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 57 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vslideup.vi v12, v8, 2 +; RV32-NEXT: vmv1r.v v8, v3 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 21 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v3 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 6 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 56 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vslideup.vi v12, v16, 8, v0.t -; RV32-NEXT: vmv.v.v v20, v12 ; RV32-NEXT: lui a1, %hi(.LCPI6_2) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_2) ; RV32-NEXT: lui a3, %hi(.LCPI6_3) ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_3) -; RV32-NEXT: lui a4, %hi(.LCPI6_4) ; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; RV32-NEXT: vle16.v v4, (a1) -; RV32-NEXT: vle16.v v16, (a3) -; RV32-NEXT: addi a1, a4, %lo(.LCPI6_4) +; RV32-NEXT: vle16.v v0, (a1) +; RV32-NEXT: vle16.v v4, (a3) +; RV32-NEXT: lui a1, %hi(.LCPI6_4) +; RV32-NEXT: addi a1, a1, %lo(.LCPI6_4) ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV32-NEXT: vle16.v v2, (a1) +; RV32-NEXT: vle16.v v10, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 73 +; RV32-NEXT: li a3, 72 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: vrgatherei16.vv v24, v8, v4 +; RV32-NEXT: vrgatherei16.vv v24, v16, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 37 +; RV32-NEXT: li a3, 36 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 49 +; RV32-NEXT: li a3, 48 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v24, v8, v16, v0.t +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v24, v16, v4, v0.t ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v20, v24 +; RV32-NEXT: vmv.v.v v12, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 37 +; RV32-NEXT: li a3, 36 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 57 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: vrgatherei16.vv v16, v24, v2 -; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vrgatherei16.vv v12, v24, v10 +; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 6 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 56 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v16, v8, 6, v0.t +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vslideup.vi v12, v24, 6, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_5) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_5) ; RV32-NEXT: lui a3, %hi(.LCPI6_6) ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_6) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: vle16.v v16, (a1) -; RV32-NEXT: vle16.v v4, (a3) -; RV32-NEXT: li a1, 960 -; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vle16.v v12, (a1) +; RV32-NEXT: vle16.v v8, (a3) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 13 +; RV32-NEXT: li a3, 12 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: li a1, 960 +; RV32-NEXT: vmv.s.x v8, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 73 +; RV32-NEXT: li a3, 72 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v24, v16 +; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v24, v0, v12 +; RV32-NEXT: vmv1r.v v3, v8 +; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 49 +; RV32-NEXT: li a3, 12 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v4, v0.t +; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v24, v16, v8, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 25 +; RV32-NEXT: li a3, 24 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_7) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_7) ; RV32-NEXT: lui a3, %hi(.LCPI6_8) ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_8) -; RV32-NEXT: lui a4, %hi(.LCPI6_9) ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV32-NEXT: vle16.v v8, (a1) -; RV32-NEXT: addi a1, a4, %lo(.LCPI6_9) +; RV32-NEXT: lui a1, %hi(.LCPI6_9) +; RV32-NEXT: addi a1, a1, %lo(.LCPI6_9) ; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; RV32-NEXT: vle16.v v24, (a3) -; RV32-NEXT: vle16.v v28, (a1) +; RV32-NEXT: vle16.v v4, (a3) +; RV32-NEXT: vle16.v v12, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 57 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: vrgatherei16.vv v4, v0, v8 +; RV32-NEXT: vrgatherei16.vv v12, v24, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 21 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 6 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 56 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v4, v8, 4, v0.t +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv4r.v v24, v16 +; RV32-NEXT: vslideup.vi v12, v16, 4, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 21 +; RV32-NEXT: li a3, 12 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 73 +; RV32-NEXT: li a3, 72 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: vrgatherei16.vv v8, v0, v24 +; RV32-NEXT: vrgatherei16.vv v8, v16, v4 +; RV32-NEXT: vmv1r.v v0, v3 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 13 +; RV32-NEXT: li a3, 48 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v8, v16, v28, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 13 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_10) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_10) ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: vle16.v v8, (a1) +; RV32-NEXT: vle16.v v12, (a1) ; RV32-NEXT: lui a1, 15 ; RV32-NEXT: vmv.s.x v3, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 57 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v12, v16, 6 +; RV32-NEXT: vslideup.vi v8, v16, 6 ; RV32-NEXT: vmv1r.v v0, v3 +; RV32-NEXT: vrgatherei16.vv v8, v24, v12, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 6 -; RV32-NEXT: add a1, a3, a1 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 57 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_11) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_11) ; RV32-NEXT: lui a3, %hi(.LCPI6_12) ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_12) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: vle16.v v8, (a1) -; RV32-NEXT: vle16.v v12, (a3) +; RV32-NEXT: vle16.v v24, (a1) +; RV32-NEXT: vle16.v v4, (a3) ; RV32-NEXT: li a1, 1008 ; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 73 +; RV32-NEXT: li a3, 72 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v24, v16, v8 +; RV32-NEXT: vrgatherei16.vv v8, v16, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 49 +; RV32-NEXT: li a3, 48 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v24, v16, v12, v0.t +; RV32-NEXT: vrgatherei16.vv v8, v16, v4, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 2 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_13) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_13) ; RV32-NEXT: lui a3, %hi(.LCPI6_14) ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_14) -; RV32-NEXT: lui a4, %hi(.LCPI6_15) ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV32-NEXT: vle16.v v20, (a1) -; RV32-NEXT: addi a1, a4, %lo(.LCPI6_15) +; RV32-NEXT: lui a1, %hi(.LCPI6_15) +; RV32-NEXT: addi a1, a1, %lo(.LCPI6_15) ; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; RV32-NEXT: vle16.v v24, (a3) ; RV32-NEXT: vle16.v v8, (a1) @@ -526,27 +521,26 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v0, v3 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 41 +; RV32-NEXT: li a3, 40 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 6 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 56 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vrgatherei16.vv v16, v8, v20, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 25 +; RV32-NEXT: li a3, 24 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -554,7 +548,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: vmv.v.v v20, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 73 +; RV32-NEXT: li a3, 72 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -562,12 +556,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: vrgatherei16.vv v8, v0, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 49 +; RV32-NEXT: li a2, 48 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -576,31 +570,28 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v8, v24, v4, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 21 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 13 +; RV32-NEXT: li a2, 12 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: vmv.v.v v24, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 57 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a2, a1, 2 -; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vmv.v.v v28, v0 ; RV32-NEXT: vmv.v.v v16, v8 ; RV32-NEXT: addi a1, a0, 320 @@ -614,21 +605,21 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vse32.v v20, (a1) ; RV32-NEXT: addi a1, a0, 64 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 37 +; RV32-NEXT: li a3, 36 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 45 +; RV32-NEXT: li a2, 44 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 82 +; RV32-NEXT: li a1, 80 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll index 1748315186936..7608349ef7aef 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -549,20 +549,20 @@ define <128 x i1> @buildvec_mask_v128i1() { define <128 x i1> @buildvec_mask_optsize_v128i1() optsize { ; CHECK-LABEL: buildvec_mask_optsize_v128i1: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI21_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI21_0) -; CHECK-NEXT: li a1, 128 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vlm.v v0, (a0) +; CHECK-NEXT: li a0, 128 +; CHECK-NEXT: lui a1, %hi(.LCPI21_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI21_0) +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: ret ; ; ZVE32F-LABEL: buildvec_mask_optsize_v128i1: ; ZVE32F: # %bb.0: -; ZVE32F-NEXT: lui a0, %hi(.LCPI21_0) -; ZVE32F-NEXT: addi a0, a0, %lo(.LCPI21_0) -; ZVE32F-NEXT: li a1, 128 -; ZVE32F-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; ZVE32F-NEXT: vlm.v v0, (a0) +; ZVE32F-NEXT: li a0, 128 +; ZVE32F-NEXT: lui a1, %hi(.LCPI21_0) +; ZVE32F-NEXT: addi a1, a1, %lo(.LCPI21_0) +; ZVE32F-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; ZVE32F-NEXT: vlm.v v0, (a1) ; ZVE32F-NEXT: ret ret <128 x i1> } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index db0969c85a8e2..69341981288b9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -13327,22 +13327,22 @@ define <8 x i16> @mgather_shuffle_rotate(ptr %base) { define <8 x i16> @mgather_shuffle_vrgather(ptr %base) { ; RV32-LABEL: mgather_shuffle_vrgather: ; RV32: # %bb.0: +; RV32-NEXT: lui a1, %hi(.LCPI119_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI119_0) ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v9, (a0) -; RV32-NEXT: lui a0, %hi(.LCPI119_0) -; RV32-NEXT: addi a0, a0, %lo(.LCPI119_0) +; RV32-NEXT: vle16.v v9, (a1) ; RV32-NEXT: vle16.v v10, (a0) -; RV32-NEXT: vrgather.vv v8, v9, v10 +; RV32-NEXT: vrgather.vv v8, v10, v9 ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_shuffle_vrgather: ; RV64V: # %bb.0: +; RV64V-NEXT: lui a1, %hi(.LCPI119_0) +; RV64V-NEXT: addi a1, a1, %lo(.LCPI119_0) ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64V-NEXT: vle16.v v9, (a0) -; RV64V-NEXT: lui a0, %hi(.LCPI119_0) -; RV64V-NEXT: addi a0, a0, %lo(.LCPI119_0) +; RV64V-NEXT: vle16.v v9, (a1) ; RV64V-NEXT: vle16.v v10, (a0) -; RV64V-NEXT: vrgather.vv v8, v9, v10 +; RV64V-NEXT: vrgather.vv v8, v10, v9 ; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_shuffle_vrgather: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll index d70ed2fb0e266..4b1f0beb48700 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll @@ -228,11 +228,11 @@ define <16 x i8> @reverse_v16i8(<16 x i8> %a) { define <32 x i8> @reverse_v32i8(<32 x i8> %a) { ; CHECK-LABEL: reverse_v32i8: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI12_0) -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vle8.v v12, (a0) +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: lui a1, %hi(.LCPI12_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI12_0) +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v12, (a1) ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -243,11 +243,11 @@ define <32 x i8> @reverse_v32i8(<32 x i8> %a) { define <64 x i8> @reverse_v64i8(<64 x i8> %a) { ; CHECK-LABEL: reverse_v64i8: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI13_0) -; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: lui a1, %hi(.LCPI13_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI13_0) +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vle8.v v16, (a1) ; CHECK-NEXT: vrgather.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -323,11 +323,11 @@ define <16 x i16> @reverse_v16i16(<16 x i16> %a) { define <32 x i16> @reverse_v32i16(<32 x i16> %a) { ; CHECK-LABEL: reverse_v32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI19_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI19_0) -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vle8.v v12, (a0) +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: lui a1, %hi(.LCPI19_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI19_0) +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vle8.v v12, (a1) ; CHECK-NEXT: vsext.vf2 v16, v12 ; CHECK-NEXT: vrgather.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 @@ -520,11 +520,11 @@ define <16 x half> @reverse_v16f16(<16 x half> %a) { define <32 x half> @reverse_v32f16(<32 x half> %a) { ; CHECK-LABEL: reverse_v32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI34_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI34_0) -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vle8.v v12, (a0) +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: lui a1, %hi(.LCPI34_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI34_0) +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vle8.v v12, (a1) ; CHECK-NEXT: vsext.vf2 v16, v12 ; CHECK-NEXT: vrgather.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 @@ -820,33 +820,33 @@ define <6 x i64> @reverse_v6i64(<6 x i64> %a) { define <12 x i64> @reverse_v12i64(<12 x i64> %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_v12i64: ; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: lui a0, %hi(.LCPI46_0) -; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, %lo(.LCPI46_0) -; RV32-BITS-UNKNOWN-NEXT: li a1, 32 -; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vle16.v v24, (a0) +; RV32-BITS-UNKNOWN-NEXT: li a0, 32 +; RV32-BITS-UNKNOWN-NEXT: lui a1, %hi(.LCPI46_0) +; RV32-BITS-UNKNOWN-NEXT: addi a1, a1, %lo(.LCPI46_0) +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vle16.v v24, (a1) ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v16, v8, v24 ; RV32-BITS-UNKNOWN-NEXT: vmv.v.v v8, v16 ; RV32-BITS-UNKNOWN-NEXT: ret ; ; RV32-BITS-256-LABEL: reverse_v12i64: ; RV32-BITS-256: # %bb.0: -; RV32-BITS-256-NEXT: lui a0, %hi(.LCPI46_0) -; RV32-BITS-256-NEXT: addi a0, a0, %lo(.LCPI46_0) -; RV32-BITS-256-NEXT: li a1, 32 -; RV32-BITS-256-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-BITS-256-NEXT: vle16.v v24, (a0) +; RV32-BITS-256-NEXT: li a0, 32 +; RV32-BITS-256-NEXT: lui a1, %hi(.LCPI46_0) +; RV32-BITS-256-NEXT: addi a1, a1, %lo(.LCPI46_0) +; RV32-BITS-256-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-BITS-256-NEXT: vle16.v v24, (a1) ; RV32-BITS-256-NEXT: vrgatherei16.vv v16, v8, v24 ; RV32-BITS-256-NEXT: vmv.v.v v8, v16 ; RV32-BITS-256-NEXT: ret ; ; RV32-BITS-512-LABEL: reverse_v12i64: ; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: lui a0, %hi(.LCPI46_0) -; RV32-BITS-512-NEXT: addi a0, a0, %lo(.LCPI46_0) -; RV32-BITS-512-NEXT: li a1, 32 -; RV32-BITS-512-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-BITS-512-NEXT: vle16.v v24, (a0) +; RV32-BITS-512-NEXT: li a0, 32 +; RV32-BITS-512-NEXT: lui a1, %hi(.LCPI46_0) +; RV32-BITS-512-NEXT: addi a1, a1, %lo(.LCPI46_0) +; RV32-BITS-512-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-BITS-512-NEXT: vle16.v v24, (a1) ; RV32-BITS-512-NEXT: vrgatherei16.vv v16, v8, v24 ; RV32-BITS-512-NEXT: vmv.v.v v8, v16 ; RV32-BITS-512-NEXT: ret @@ -883,11 +883,11 @@ define <12 x i64> @reverse_v12i64(<12 x i64> %a) { ; ; RV32-ZVBB-LABEL: reverse_v12i64: ; RV32-ZVBB: # %bb.0: -; RV32-ZVBB-NEXT: lui a0, %hi(.LCPI46_0) -; RV32-ZVBB-NEXT: addi a0, a0, %lo(.LCPI46_0) -; RV32-ZVBB-NEXT: li a1, 32 -; RV32-ZVBB-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-ZVBB-NEXT: vle16.v v24, (a0) +; RV32-ZVBB-NEXT: li a0, 32 +; RV32-ZVBB-NEXT: lui a1, %hi(.LCPI46_0) +; RV32-ZVBB-NEXT: addi a1, a1, %lo(.LCPI46_0) +; RV32-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-ZVBB-NEXT: vle16.v v24, (a1) ; RV32-ZVBB-NEXT: vrgatherei16.vv v16, v8, v24 ; RV32-ZVBB-NEXT: vmv.v.v v8, v16 ; RV32-ZVBB-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll index 0161ac4bc338d..e2580c132f65e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll @@ -225,11 +225,11 @@ declare <16 x i64> @llvm.experimental.stepvector.v16i64() define <16 x i64> @stepvector_v16i64() { ; RV32-LABEL: stepvector_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI16_0) -; RV32-NEXT: addi a0, a0, %lo(.LCPI16_0) -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vle8.v v16, (a0) +; RV32-NEXT: li a0, 32 +; RV32-NEXT: lui a1, %hi(.LCPI16_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI16_0) +; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-NEXT: vle8.v v16, (a1) ; RV32-NEXT: vsext.vf4 v8, v16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll index 29f8eaba90052..e3c7d02462cc7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll @@ -9,12 +9,11 @@ declare <8 x i7> @llvm.vp.sdiv.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) define <8 x i7> @vdiv_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vdiv_vv_v8i7: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vadd.vv v9, v9, v9 -; CHECK-NEXT: vsra.vi v9, v9, 1 -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsll.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vdiv.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %v = call <8 x i7> @llvm.vp.sdiv.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll index 3f8eb0ff276b7..03bd85bf5e69e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll @@ -10,10 +10,9 @@ define <8 x i7> @vdivu_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroe ; CHECK-LABEL: vdivu_vv_v8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 127 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vand.vx v8, v8, a1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a1, v0.t +; CHECK-NEXT: vand.vx v8, v8, a1, v0.t ; CHECK-NEXT: vdivu.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %v = call <8 x i7> @llvm.vp.udiv.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll index 9789afda9344a..0b0d758ad8ded 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll @@ -9,12 +9,11 @@ declare <8 x i7> @llvm.vp.smax.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) define <8 x i7> @vmax_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmax_vv_v8i7: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vadd.vv v9, v9, v9 -; CHECK-NEXT: vsra.vi v9, v9, 1 -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsll.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %v = call <8 x i7> @llvm.vp.smax.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll index 36b0a4642b616..98e630a0e59e5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll @@ -10,10 +10,9 @@ define <8 x i7> @vmaxu_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroe ; CHECK-LABEL: vmaxu_vv_v8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 127 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vand.vx v8, v8, a1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a1, v0.t +; CHECK-NEXT: vand.vx v8, v8, a1, v0.t ; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %v = call <8 x i7> @llvm.vp.umax.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll index adb0a30f34d35..a6e3764b37550 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll @@ -9,12 +9,11 @@ declare <8 x i7> @llvm.vp.smin.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) define <8 x i7> @vmin_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmin_vv_v8i7: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vadd.vv v9, v9, v9 -; CHECK-NEXT: vsra.vi v9, v9, 1 -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsll.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %v = call <8 x i7> @llvm.vp.smin.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll index 671ce82d4ae79..c59b65edd1ec1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll @@ -10,10 +10,9 @@ define <8 x i7> @vminu_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroe ; CHECK-LABEL: vminu_vv_v8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 127 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vand.vx v8, v8, a1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a1, v0.t +; CHECK-NEXT: vand.vx v8, v8, a1, v0.t ; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %v = call <8 x i7> @llvm.vp.umin.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll index 4bbbad5ed0e0e..ff8a63e371c8e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll @@ -9,12 +9,11 @@ declare <8 x i7> @llvm.vp.srem.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) define <8 x i7> @vrem_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vrem_vv_v8i7: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vadd.vv v9, v9, v9 -; CHECK-NEXT: vsra.vi v9, v9, 1 -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsll.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vrem.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %v = call <8 x i7> @llvm.vp.srem.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll index ee11307bddc88..b5eec4142c782 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll @@ -10,10 +10,9 @@ define <8 x i7> @vremu_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroe ; CHECK-LABEL: vremu_vv_v8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 127 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vand.vx v8, v8, a1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a1, v0.t +; CHECK-NEXT: vand.vx v8, v8, a1, v0.t ; CHECK-NEXT: vremu.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %v = call <8 x i7> @llvm.vp.urem.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vshl-vp.ll index c4b7c1f2f19f0..16a0fddfa9827 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vshl-vp.ll @@ -10,9 +10,8 @@ define <8 x i7> @vsll_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroex ; CHECK-LABEL: vsll_vv_v8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 127 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vand.vx v9, v9, a1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a1, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %v = call <8 x i7> @llvm.vp.shl.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll index 7ea5b1f0b505a..180fafa9659b1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll @@ -10,11 +10,10 @@ define <8 x i7> @vsra_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroex ; CHECK-LABEL: vsra_vv_v8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 127 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a1, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %v = call <8 x i7> @llvm.vp.ashr.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsrl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsrl-vp.ll index 9f9d4af0cc2f3..22f04803eadd7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsrl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsrl-vp.ll @@ -10,10 +10,9 @@ define <8 x i7> @vsrl_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroex ; CHECK-LABEL: vsrl_vv_v8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 127 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vand.vx v8, v8, a1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a1, v0.t +; CHECK-NEXT: vand.vx v8, v8, a1, v0.t ; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %v = call <8 x i7> @llvm.vp.lshr.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll index f88a9b3081a1a..372937bb5ca5d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll @@ -9,7 +9,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv1f16( @nearbyint_nxv1f16( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) @@ -17,6 +17,7 @@ define @nearbyint_nxv1f16( %v) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -32,7 +33,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv2f16( @nearbyint_nxv2f16( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) @@ -40,6 +41,7 @@ define @nearbyint_nxv2f16( %v) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu @@ -55,7 +57,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv4f16( @nearbyint_nxv4f16( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) @@ -63,6 +65,7 @@ define @nearbyint_nxv4f16( %v) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -78,7 +81,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv8f16( @nearbyint_nxv8f16( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) @@ -86,6 +89,7 @@ define @nearbyint_nxv8f16( %v) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu @@ -101,7 +105,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv16f16(< define @nearbyint_nxv16f16( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) @@ -109,6 +113,7 @@ define @nearbyint_nxv16f16( %v) strictf ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu @@ -124,7 +129,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv32f16(< define @nearbyint_nxv32f16( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) @@ -132,6 +137,7 @@ define @nearbyint_nxv32f16( %v) strictf ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu @@ -147,7 +153,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv1f32( @nearbyint_nxv1f32( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -155,6 +161,7 @@ define @nearbyint_nxv1f32( %v) strictfp ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu @@ -170,7 +177,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv2f32( @nearbyint_nxv2f32( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -178,6 +185,7 @@ define @nearbyint_nxv2f32( %v) strictfp ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu @@ -193,7 +201,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv4f32( @nearbyint_nxv4f32( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 @@ -201,6 +209,7 @@ define @nearbyint_nxv4f32( %v) strictfp ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -216,7 +225,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv8f32( @nearbyint_nxv8f32( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 @@ -224,6 +233,7 @@ define @nearbyint_nxv8f32( %v) strictfp ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu @@ -239,7 +249,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv16f32( define @nearbyint_nxv16f32( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 @@ -247,6 +257,7 @@ define @nearbyint_nxv16f32( %v) stric ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu @@ -262,7 +273,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv1f64(< define @nearbyint_nxv1f64( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) @@ -270,6 +281,7 @@ define @nearbyint_nxv1f64( %v) strict ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -285,7 +297,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv2f64(< define @nearbyint_nxv2f64( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) @@ -293,6 +305,7 @@ define @nearbyint_nxv2f64( %v) strict ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu @@ -308,7 +321,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv4f64(< define @nearbyint_nxv4f64( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) @@ -316,6 +329,7 @@ define @nearbyint_nxv4f64( %v) strict ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu @@ -331,7 +345,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv8f64(< define @nearbyint_nxv8f64( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) @@ -339,6 +353,7 @@ define @nearbyint_nxv8f64( %v) strict ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll b/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll index 9da4d7ec9f2d0..4aa26d6b79ca4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll @@ -11,22 +11,22 @@ define i32 @test(i32 %size, ptr %add.ptr, i64 %const) { ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: addi a3, a2, 1 -; RV32-NEXT: addi a4, a0, 1 +; RV32-NEXT: vmv.s.x v9, zero +; RV32-NEXT: vsetvli zero, a3, e8, mf2, tu, ma +; RV32-NEXT: vslideup.vx v8, v9, a2 +; RV32-NEXT: addi a2, a0, 1 ; RV32-NEXT: .LBB0_1: # %for.body ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32-NEXT: th.lrb a0, a1, a0, 0 -; RV32-NEXT: vmv.s.x v9, zero -; RV32-NEXT: vmv1r.v v10, v8 -; RV32-NEXT: vsetvli zero, a3, e8, mf2, tu, ma -; RV32-NEXT: vslideup.vx v10, v9, a2 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, tu, ma -; RV32-NEXT: vmv.s.x v10, a0 -; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32-NEXT: vmseq.vi v9, v10, 0 +; RV32-NEXT: vmv1r.v v9, v8 +; RV32-NEXT: vsetivli zero, 8, e8, m1, tu, ma +; RV32-NEXT: vmv.s.x v9, a0 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vmseq.vi v9, v9, 0 ; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: andi a5, a0, 255 -; RV32-NEXT: mv a0, a4 -; RV32-NEXT: bnez a5, .LBB0_1 +; RV32-NEXT: andi a3, a0, 255 +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bnez a3, .LBB0_1 ; RV32-NEXT: # %bb.2: # %if.then381 ; RV32-NEXT: li a0, 0 ; RV32-NEXT: ret @@ -37,23 +37,23 @@ define i32 @test(i32 %size, ptr %add.ptr, i64 %const) { ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: vmv.v.x v8, a3 ; RV64-NEXT: addi a3, a2, 1 -; RV64-NEXT: addi a4, a0, 1 +; RV64-NEXT: vmv.s.x v9, zero +; RV64-NEXT: vsetvli zero, a3, e8, mf2, tu, ma +; RV64-NEXT: vslideup.vx v8, v9, a2 +; RV64-NEXT: addi a2, a0, 1 ; RV64-NEXT: .LBB0_1: # %for.body ; RV64-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64-NEXT: sext.w a0, a0 ; RV64-NEXT: th.lrb a0, a1, a0, 0 -; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vmv1r.v v10, v8 -; RV64-NEXT: vsetvli zero, a3, e8, mf2, tu, ma -; RV64-NEXT: vslideup.vx v10, v9, a2 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, tu, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV64-NEXT: vmseq.vi v9, v10, 0 +; RV64-NEXT: vmv1r.v v9, v8 +; RV64-NEXT: vsetivli zero, 8, e8, m1, tu, ma +; RV64-NEXT: vmv.s.x v9, a0 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vmseq.vi v9, v9, 0 ; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: andi a5, a0, 255 -; RV64-NEXT: mv a0, a4 -; RV64-NEXT: bnez a5, .LBB0_1 +; RV64-NEXT: andi a3, a0, 255 +; RV64-NEXT: mv a0, a2 +; RV64-NEXT: bnez a3, .LBB0_1 ; RV64-NEXT: # %bb.2: # %if.then381 ; RV64-NEXT: li a0, 0 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fround-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fround-constrained-sdnode.ll index 3276f481f30ea..aaa7a538e70fb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fround-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fround-constrained-sdnode.ll @@ -9,7 +9,7 @@ define @round_nxv1f16( %x) strictfp { ; CHECK-LABEL: round_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) @@ -17,6 +17,7 @@ define @round_nxv1f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -31,7 +32,7 @@ declare @llvm.experimental.constrained.round.nxv1f16( @round_nxv2f16( %x) strictfp { ; CHECK-LABEL: round_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) @@ -39,6 +40,7 @@ define @round_nxv2f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -53,7 +55,7 @@ declare @llvm.experimental.constrained.round.nxv2f16( @round_nxv4f16( %x) strictfp { ; CHECK-LABEL: round_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) @@ -61,6 +63,7 @@ define @round_nxv4f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -75,7 +78,7 @@ declare @llvm.experimental.constrained.round.nxv4f16( @round_nxv8f16( %x) strictfp { ; CHECK-LABEL: round_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) @@ -83,6 +86,7 @@ define @round_nxv8f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -97,7 +101,7 @@ declare @llvm.experimental.constrained.round.nxv8f16( @round_nxv16f16( %x) strictfp { ; CHECK-LABEL: round_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) @@ -105,6 +109,7 @@ define @round_nxv16f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -119,7 +124,7 @@ declare @llvm.experimental.constrained.round.nxv16f16( @round_nxv32f16( %x) strictfp { ; CHECK-LABEL: round_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) @@ -127,6 +132,7 @@ define @round_nxv32f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -141,7 +147,7 @@ declare @llvm.experimental.constrained.round.nxv32f16( @round_nxv1f32( %x) strictfp { ; CHECK-LABEL: round_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -149,6 +155,7 @@ define @round_nxv1f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -163,7 +170,7 @@ declare @llvm.experimental.constrained.round.nxv1f32( @round_nxv2f32( %x) strictfp { ; CHECK-LABEL: round_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -171,6 +178,7 @@ define @round_nxv2f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -185,7 +193,7 @@ declare @llvm.experimental.constrained.round.nxv2f32( @round_nxv4f32( %x) strictfp { ; CHECK-LABEL: round_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 @@ -193,6 +201,7 @@ define @round_nxv4f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -207,7 +216,7 @@ declare @llvm.experimental.constrained.round.nxv4f32( @round_nxv8f32( %x) strictfp { ; CHECK-LABEL: round_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 @@ -215,6 +224,7 @@ define @round_nxv8f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -229,7 +239,7 @@ declare @llvm.experimental.constrained.round.nxv8f32( @round_nxv16f32( %x) strictfp { ; CHECK-LABEL: round_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 @@ -237,6 +247,7 @@ define @round_nxv16f32( %x) strictfp ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -251,7 +262,7 @@ declare @llvm.experimental.constrained.round.nxv16f32( @round_nxv1f64( %x) strictfp { ; CHECK-LABEL: round_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) @@ -259,6 +270,7 @@ define @round_nxv1f64( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -273,7 +285,7 @@ declare @llvm.experimental.constrained.round.nxv1f64( @round_nxv2f64( %x) strictfp { ; CHECK-LABEL: round_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) @@ -281,6 +293,7 @@ define @round_nxv2f64( %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -295,7 +308,7 @@ declare @llvm.experimental.constrained.round.nxv2f64( @round_nxv4f64( %x) strictfp { ; CHECK-LABEL: round_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) @@ -303,6 +316,7 @@ define @round_nxv4f64( %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -317,7 +331,7 @@ declare @llvm.experimental.constrained.round.nxv4f64( @round_nxv8f64( %x) strictfp { ; CHECK-LABEL: round_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) @@ -325,6 +339,7 @@ define @round_nxv8f64( %x) strictfp { ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll index 4ebfcccbaaa6e..cdc01d658778b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll @@ -9,7 +9,7 @@ define @roundeven_nxv1f16( %x) strictfp { ; CHECK-LABEL: roundeven_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) @@ -17,6 +17,7 @@ define @roundeven_nxv1f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -31,7 +32,7 @@ declare @llvm.experimental.constrained.roundeven.nxv1f16( @roundeven_nxv2f16( %x) strictfp { ; CHECK-LABEL: roundeven_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) @@ -39,6 +40,7 @@ define @roundeven_nxv2f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -53,7 +55,7 @@ declare @llvm.experimental.constrained.roundeven.nxv2f16( @roundeven_nxv4f16( %x) strictfp { ; CHECK-LABEL: roundeven_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) @@ -61,6 +63,7 @@ define @roundeven_nxv4f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -75,7 +78,7 @@ declare @llvm.experimental.constrained.roundeven.nxv4f16( @roundeven_nxv8f16( %x) strictfp { ; CHECK-LABEL: roundeven_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) @@ -83,6 +86,7 @@ define @roundeven_nxv8f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -97,7 +101,7 @@ declare @llvm.experimental.constrained.roundeven.nxv8f16( @roundeven_nxv16f16( %x) strictfp { ; CHECK-LABEL: roundeven_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) @@ -105,6 +109,7 @@ define @roundeven_nxv16f16( %x) strictf ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -119,7 +124,7 @@ declare @llvm.experimental.constrained.roundeven.nxv16f16(< define @roundeven_nxv32f16( %x) strictfp { ; CHECK-LABEL: roundeven_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) @@ -127,6 +132,7 @@ define @roundeven_nxv32f16( %x) strictf ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -141,7 +147,7 @@ declare @llvm.experimental.constrained.roundeven.nxv32f16(< define @roundeven_nxv1f32( %x) strictfp { ; CHECK-LABEL: roundeven_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -149,6 +155,7 @@ define @roundeven_nxv1f32( %x) strictfp ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -163,7 +170,7 @@ declare @llvm.experimental.constrained.roundeven.nxv1f32( @roundeven_nxv2f32( %x) strictfp { ; CHECK-LABEL: roundeven_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -171,6 +178,7 @@ define @roundeven_nxv2f32( %x) strictfp ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -185,7 +193,7 @@ declare @llvm.experimental.constrained.roundeven.nxv2f32( @roundeven_nxv4f32( %x) strictfp { ; CHECK-LABEL: roundeven_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 @@ -193,6 +201,7 @@ define @roundeven_nxv4f32( %x) strictfp ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -207,7 +216,7 @@ declare @llvm.experimental.constrained.roundeven.nxv4f32( @roundeven_nxv8f32( %x) strictfp { ; CHECK-LABEL: roundeven_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 @@ -215,6 +224,7 @@ define @roundeven_nxv8f32( %x) strictfp ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -229,7 +239,7 @@ declare @llvm.experimental.constrained.roundeven.nxv8f32( @roundeven_nxv16f32( %x) strictfp { ; CHECK-LABEL: roundeven_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 @@ -237,6 +247,7 @@ define @roundeven_nxv16f32( %x) stric ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -251,7 +262,7 @@ declare @llvm.experimental.constrained.roundeven.nxv16f32( define @roundeven_nxv1f64( %x) strictfp { ; CHECK-LABEL: roundeven_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) @@ -259,6 +270,7 @@ define @roundeven_nxv1f64( %x) strict ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -273,7 +285,7 @@ declare @llvm.experimental.constrained.roundeven.nxv1f64(< define @roundeven_nxv2f64( %x) strictfp { ; CHECK-LABEL: roundeven_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) @@ -281,6 +293,7 @@ define @roundeven_nxv2f64( %x) strict ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -295,7 +308,7 @@ declare @llvm.experimental.constrained.roundeven.nxv2f64(< define @roundeven_nxv4f64( %x) strictfp { ; CHECK-LABEL: roundeven_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) @@ -303,6 +316,7 @@ define @roundeven_nxv4f64( %x) strict ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -317,7 +331,7 @@ declare @llvm.experimental.constrained.roundeven.nxv4f64(< define @roundeven_nxv8f64( %x) strictfp { ; CHECK-LABEL: roundeven_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) @@ -325,6 +339,7 @@ define @roundeven_nxv8f64( %x) strict ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll index bc5617957d7d0..2c5a3dfffc2cf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll @@ -1282,18 +1282,17 @@ define @fshr_v1i9( %a, %b, ; CHECK-LABEL: fshr_v1i9: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 511 -; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; CHECK-NEXT: vand.vx v10, v10, a1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vsll.vi v9, v9, 7, v0.t +; CHECK-NEXT: vand.vx v10, v10, a1, v0.t ; CHECK-NEXT: li a0, 9 ; CHECK-NEXT: vremu.vx v10, v10, a0, v0.t ; CHECK-NEXT: vadd.vi v10, v10, 7, v0.t ; CHECK-NEXT: vand.vi v11, v10, 15, v0.t +; CHECK-NEXT: vsll.vi v9, v9, 7, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t -; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vnot.v v10, v10, v0.t ; CHECK-NEXT: vand.vi v10, v10, 15, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -1306,18 +1305,17 @@ define @fshl_v1i9( %a, %b, ; CHECK-LABEL: fshl_v1i9: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 511 -; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; CHECK-NEXT: vand.vx v10, v10, a1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vsll.vi v9, v9, 7, v0.t -; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t +; CHECK-NEXT: vand.vx v10, v10, a1, v0.t ; CHECK-NEXT: li a0, 9 ; CHECK-NEXT: vremu.vx v10, v10, a0, v0.t -; CHECK-NEXT: vnot.v v11, v10, v0.t -; CHECK-NEXT: vand.vi v11, v11, 15, v0.t -; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t +; CHECK-NEXT: vand.vi v11, v10, 15, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t +; CHECK-NEXT: vnot.v v10, v10, v0.t ; CHECK-NEXT: vand.vi v10, v10, 15, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsll.vi v9, v9, 7, v0.t +; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %res = call @llvm.vp.fshl.nxv1i9( %a, %b, %c, %m, i32 %evl) @@ -1330,15 +1328,14 @@ declare @llvm.vp.fshr.nxv1i4(, @fshr_v1i4( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: fshr_v1i4: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; CHECK-NEXT: vand.vi v10, v10, 15 -; CHECK-NEXT: li a1, 4 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vremu.vx v10, v10, a1, v0.t +; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vand.vi v9, v9, 15, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vv v8, v8, v10, v0.t +; CHECK-NEXT: li a0, 4 +; CHECK-NEXT: vremu.vx v9, v10, a0, v0.t +; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret %trunca = call @llvm.vp.trunc.nxv1i4.nxv1i8( %a, %m, i32 zeroext %evl) @@ -1353,15 +1350,14 @@ declare @llvm.vp.fshl.nxv1i4(, @fshl_v1i4( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: fshl_v1i4: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; CHECK-NEXT: vand.vi v10, v10, 15 -; CHECK-NEXT: li a1, 4 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vremu.vx v10, v10, a1, v0.t +; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vand.vi v9, v9, 15, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t +; CHECK-NEXT: li a0, 4 +; CHECK-NEXT: vremu.vx v9, v10, a0, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v9, v0.t ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll index 3665669d83a3d..21615b516da89 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll @@ -7,13 +7,14 @@ define @trunc_nxv1f16( %x) strictfp { ; CHECK-LABEL: trunc_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -27,13 +28,14 @@ declare @llvm.experimental.constrained.trunc.nxv1f16( @trunc_nxv2f16( %x) strictfp { ; CHECK-LABEL: trunc_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu @@ -47,13 +49,14 @@ declare @llvm.experimental.constrained.trunc.nxv2f16( @trunc_nxv4f16( %x) strictfp { ; CHECK-LABEL: trunc_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -67,13 +70,14 @@ declare @llvm.experimental.constrained.trunc.nxv4f16( @trunc_nxv8f16( %x) strictfp { ; CHECK-LABEL: trunc_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu @@ -87,13 +91,14 @@ declare @llvm.experimental.constrained.trunc.nxv8f16( @trunc_nxv16f16( %x) strictfp { ; CHECK-LABEL: trunc_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu @@ -107,13 +112,14 @@ declare @llvm.experimental.constrained.trunc.nxv16f16( @trunc_nxv32f16( %x) strictfp { ; CHECK-LABEL: trunc_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu @@ -127,13 +133,14 @@ declare @llvm.experimental.constrained.trunc.nxv32f16( @trunc_nxv1f32( %x) strictfp { ; CHECK-LABEL: trunc_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu @@ -147,13 +154,14 @@ declare @llvm.experimental.constrained.trunc.nxv1f32( @trunc_nxv2f32( %x) strictfp { ; CHECK-LABEL: trunc_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu @@ -167,13 +175,14 @@ declare @llvm.experimental.constrained.trunc.nxv2f32( @trunc_nxv4f32( %x) strictfp { ; CHECK-LABEL: trunc_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -187,13 +196,14 @@ declare @llvm.experimental.constrained.trunc.nxv4f32( @trunc_nxv8f32( %x) strictfp { ; CHECK-LABEL: trunc_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu @@ -207,13 +217,14 @@ declare @llvm.experimental.constrained.trunc.nxv8f32( @trunc_nxv16f32( %x) strictfp { ; CHECK-LABEL: trunc_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu @@ -227,13 +238,14 @@ declare @llvm.experimental.constrained.trunc.nxv16f32( @trunc_nxv1f64( %x) strictfp { ; CHECK-LABEL: trunc_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -247,13 +259,14 @@ declare @llvm.experimental.constrained.trunc.nxv1f64( @trunc_nxv2f64( %x) strictfp { ; CHECK-LABEL: trunc_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu @@ -267,13 +280,14 @@ declare @llvm.experimental.constrained.trunc.nxv2f64( @trunc_nxv4f64( %x) strictfp { ; CHECK-LABEL: trunc_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu @@ -287,13 +301,14 @@ declare @llvm.experimental.constrained.trunc.nxv4f64( @trunc_nxv8f64( %x) strictfp { ; CHECK-LABEL: trunc_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/mul-combine.ll b/llvm/test/CodeGen/RISCV/rvv/mul-combine.ll new file mode 100644 index 0000000000000..6a7da925b4d43 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/mul-combine.ll @@ -0,0 +1,117 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-RV64 + +define <2 x i16> @test_v2i16(<2 x i16> %x) { +; CHECK-RV32-LABEL: test_v2i16: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-RV32-NEXT: vsra.vi v8, v8, 7 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: test_v2i16: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-RV64-NEXT: vsra.vi v8, v8, 7 +; CHECK-RV64-NEXT: ret + %1 = lshr <2 x i16> %x, + %2 = and <2 x i16> %1, + %3 = mul <2 x i16> %2, + ret <2 x i16> %3 +} + +define @test_nxv2i16( %x) { +; CHECK-RV32-LABEL: test_nxv2i16: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-RV32-NEXT: vsrl.vi v8, v8, 7 +; CHECK-RV32-NEXT: li a0, 257 +; CHECK-RV32-NEXT: vand.vx v8, v8, a0 +; CHECK-RV32-NEXT: vsll.vi v8, v8, 8 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: test_nxv2i16: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-RV64-NEXT: vsrl.vi v8, v8, 7 +; CHECK-RV64-NEXT: li a0, 257 +; CHECK-RV64-NEXT: vand.vx v8, v8, a0 +; CHECK-RV64-NEXT: vsll.vi v8, v8, 8 +; CHECK-RV64-NEXT: ret + %1 = lshr %x, splat (i16 7) + %2 = and %1, splat (i16 257) + %3 = mul %2, splat (i16 256) + ret %3 +} + +define <2 x i32> @test_v2i32(<2 x i32> %x) { +; CHECK-RV32-LABEL: test_v2i32: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-RV32-NEXT: vsra.vi v8, v8, 15 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: test_v2i32: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-RV64-NEXT: vsra.vi v8, v8, 15 +; CHECK-RV64-NEXT: ret + %1 = lshr <2 x i32> %x, + %2 = and <2 x i32> %1, + %3 = mul <2 x i32> %2, + ret <2 x i32> %3 +} + +define @test_nxv2i32( %x) { +; CHECK-RV32-LABEL: test_nxv2i32: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-RV32-NEXT: vsra.vi v8, v8, 15 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: test_nxv2i32: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-RV64-NEXT: vsra.vi v8, v8, 15 +; CHECK-RV64-NEXT: ret + %1 = lshr %x, splat (i32 15) + %2 = and %1, splat (i32 65537) + %3 = mul %2, splat (i32 65535) + ret %3 +} + +define <2 x i64> @test_v2i64(<2 x i64> %x) { +; CHECK-RV32-LABEL: test_v2i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-RV32-NEXT: vsra.vi v8, v8, 31 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: test_v2i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-RV64-NEXT: vsra.vi v8, v8, 31 +; CHECK-RV64-NEXT: ret + %1 = lshr <2 x i64> %x, + %2 = and <2 x i64> %1, + %3 = mul <2 x i64> %2, + ret <2 x i64> %3 +} + +define @test_nxv2i64( %x) { +; CHECK-RV32-LABEL: test_nxv2i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-RV32-NEXT: vsra.vi v8, v8, 31 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: test_nxv2i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-RV64-NEXT: vsra.vi v8, v8, 31 +; CHECK-RV64-NEXT: ret + %1 = lshr %x, splat (i64 31) + %2 = and %1, splat (i64 4294967297) + %3 = mul %2, splat (i64 4294967295) + ret %3 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll index 6e327457bebff..368f454fa5fda 100644 --- a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll @@ -106,11 +106,11 @@ define <16 x i8> @v16i8(<16 x i8> %a) { define <32 x i8> @v16i8_2(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: v16i8_2: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI7_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI7_0) -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vle8.v v12, (a0) +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: lui a1, %hi(.LCPI7_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI7_0) +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v12, (a1) ; CHECK-NEXT: vmv1r.v v14, v9 ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vid.v v8 @@ -230,11 +230,11 @@ define <16 x i16> @v16i16(<16 x i16> %a) { define <32 x i16> @v16i16_2(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: v16i16_2: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI15_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI15_0) -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: lui a1, %hi(.LCPI15_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI15_0) +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vle16.v v16, (a1) ; CHECK-NEXT: vmv2r.v v20, v10 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vrgather.vv v8, v12, v16 @@ -363,11 +363,11 @@ define <16 x i32> @v16i32(<16 x i32> %a) { define <32 x i32> @v16i32_2(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: v16i32_2: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI23_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI23_0) -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vle16.v v20, (a0) +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: lui a1, %hi(.LCPI23_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI23_0) +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vle16.v v20, (a1) ; CHECK-NEXT: vmv4r.v v24, v12 ; CHECK-NEXT: vmv4r.v v16, v8 ; CHECK-NEXT: vrgatherei16.vv v8, v16, v20 @@ -548,11 +548,11 @@ define <16 x half> @v16f16(<16 x half> %a) { define <32 x half> @v16f16_2(<16 x half> %a) { ; CHECK-LABEL: v16f16_2: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI35_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI35_0) -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: lui a1, %hi(.LCPI35_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI35_0) +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vle16.v v16, (a1) ; CHECK-NEXT: vrgather.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -719,11 +719,11 @@ define <8 x double> @v4f64_2(<4 x double> %a, <4 x double> %b) { define <32 x i8> @v32i8(<32 x i8> %a) { ; CHECK-LABEL: v32i8: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI46_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0) -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vle8.v v12, (a0) +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: lui a1, %hi(.LCPI46_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI46_0) +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v12, (a1) ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll index 26089706cf99e..a4b7ca7f39768 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll @@ -9,11 +9,15 @@ declare @llvm.vp.sdiv.nxv8i7(, @vdiv_vx_nxv8i7( %a, i7 signext %b, %mask, i32 zeroext %evl) { ; CHECK-LABEL: vdiv_vx_nxv8i7: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vdiv.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t +; CHECK-NEXT: vdiv.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i7 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll index f41b885a66eaa..67c3f9dbf2869 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll @@ -10,11 +10,12 @@ define @vdivu_vx_nxv8i7( %a, i7 signext %b, < ; CHECK-LABEL: vdivu_vx_nxv8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 127 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vand.vx v8, v8, a2, v0.t ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a2 ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vand.vx v9, v9, a2 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a2, v0.t ; CHECK-NEXT: vdivu.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i7 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll index 8a76467986620..c15caa31bb098 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll @@ -9,11 +9,15 @@ declare @llvm.vp.smax.nxv8i7(, @vmax_vx_nxv8i7( %a, i7 signext %b, %mask, i32 zeroext %evl) { ; CHECK-LABEL: vmax_vx_nxv8i7: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i7 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll index 1c74887c1b20f..df494f8af7387 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll @@ -10,11 +10,12 @@ define @vmaxu_vx_nxv8i7( %a, i7 signext %b, < ; CHECK-LABEL: vmaxu_vx_nxv8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 127 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vand.vx v8, v8, a2, v0.t ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a2 ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vand.vx v9, v9, a2 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a2, v0.t ; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i7 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll index 1c71242c3c7d7..794a21c7c6aba 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll @@ -9,11 +9,15 @@ declare @llvm.vp.smin.nxv8i7(, @vmin_vx_nxv8i7( %a, i7 signext %b, %mask, i32 zeroext %evl) { ; CHECK-LABEL: vmin_vx_nxv8i7: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i7 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll index 6d89a9777cf91..d54de281a7fd2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll @@ -10,11 +10,12 @@ define @vminu_vx_nxv8i7( %a, i7 signext %b, < ; CHECK-LABEL: vminu_vx_nxv8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 127 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vand.vx v8, v8, a2, v0.t ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a2 ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vand.vx v9, v9, a2 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a2, v0.t ; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i7 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll index cf85fd827b51f..2ef96f4b3896f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll @@ -9,11 +9,15 @@ declare @llvm.vp.srem.nxv8i7(, @vrem_vx_nxv8i7( %a, i7 signext %b, %mask, i32 zeroext %evl) { ; CHECK-LABEL: vrem_vx_nxv8i7: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vrem.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t +; CHECK-NEXT: vrem.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i7 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll index 61bdd5b8d3c8a..1f1ed4a1269ac 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll @@ -10,11 +10,12 @@ define @vremu_vx_nxv8i7( %a, i7 signext %b, < ; CHECK-LABEL: vremu_vx_nxv8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 127 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vand.vx v8, v8, a2, v0.t ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a2 ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vand.vx v9, v9, a2 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a2, v0.t ; CHECK-NEXT: vremu.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i7 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll index c04d5ea2da3c1..380835494ed17 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll @@ -12,8 +12,8 @@ define @vsll_vx_nxv8i7( %a, i7 signext %b, poison, i7 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll index 7bae84142d8ae..8dbb57fd15cf1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll @@ -934,3 +934,22 @@ define @vsra_vi_mask_nxv8i32( %va, %va, %vs ret %vc } + +; Negative test. We shouldn't look through the vp.trunc as it isn't vlmax like +; the rest of the code. +define @vsra_vv_nxv1i8_sext_zext_mixed_trunc( %va, %vb, %m, i32 %evl) { +; CHECK-LABEL: vsra_vv_nxv1i8_sext_zext_mixed_trunc: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 7 +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmin.vx v9, v8, a0 +; CHECK-NEXT: vsra.vv v8, v8, v9 +; CHECK-NEXT: ret + %sexted_va = sext %va to + %zexted_vb = zext %va to + %expand = ashr %sexted_va, %zexted_vb + %vc = trunc %expand to + %vd = call @llvm.vp.trunc.nxv1i8.nxvi16( %vc, %m, i32 %evl) + ret %vd +} +declare @llvm.vp.trunc.nxv1i8.nxvi16(, , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll index 632c4db5c5bb5..cff8cc710d21f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll @@ -9,13 +9,14 @@ declare @llvm.vp.ashr.nxv8i7(, @vsra_vx_nxv8i7( %a, i7 signext %b, %mask, i32 zeroext %evl) { ; CHECK-LABEL: vsra_vx_nxv8i7: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i7 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll index ec5b7f3faf7ca..ff6771b643031 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll @@ -10,11 +10,12 @@ define @vsrl_vx_nxv8i7( %a, i7 signext %b, poison, i7 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/shl-cttz.ll b/llvm/test/CodeGen/RISCV/shl-cttz.ll new file mode 100644 index 0000000000000..0eeb8b04c7e5d --- /dev/null +++ b/llvm/test/CodeGen/RISCV/shl-cttz.ll @@ -0,0 +1,807 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=riscv32 -mattr=+m < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -mattr=+m,+zbb < %s \ +; RUN: | FileCheck %s -check-prefix=RV32ZBB +; RUN: llc -mtriple=riscv64 -mattr=+m < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64I,RV64IILLEGALI32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+zbb < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64ZBB,RV64ZBBILLEGALI32 +; RUN: llc -mtriple=riscv64 -mattr=+m -riscv-experimental-rv64-legal-i32 < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64I,RV64ILEGALI32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+zbb -riscv-experimental-rv64-legal-i32 < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64ZBB,RV64ZBBLEGALI32 + +define i8 @shl_cttz_i8(i8 %x, i8 %y) { +; RV32I-LABEL: shl_cttz_i8: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi a2, a1, -1 +; RV32I-NEXT: not a1, a1 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: srli a2, a1, 1 +; RV32I-NEXT: andi a2, a2, 85 +; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: andi a2, a1, 51 +; RV32I-NEXT: srli a1, a1, 2 +; RV32I-NEXT: andi a1, a1, 51 +; RV32I-NEXT: add a1, a2, a1 +; RV32I-NEXT: srli a2, a1, 4 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: andi a1, a1, 15 +; RV32I-NEXT: sll a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: shl_cttz_i8: +; RV32ZBB: # %bb.0: # %entry +; RV32ZBB-NEXT: ctz a1, a1 +; RV32ZBB-NEXT: sll a0, a0, a1 +; RV32ZBB-NEXT: ret +; +; RV64IILLEGALI32-LABEL: shl_cttz_i8: +; RV64IILLEGALI32: # %bb.0: # %entry +; RV64IILLEGALI32-NEXT: addi a2, a1, -1 +; RV64IILLEGALI32-NEXT: not a1, a1 +; RV64IILLEGALI32-NEXT: and a1, a1, a2 +; RV64IILLEGALI32-NEXT: srli a2, a1, 1 +; RV64IILLEGALI32-NEXT: andi a2, a2, 85 +; RV64IILLEGALI32-NEXT: subw a1, a1, a2 +; RV64IILLEGALI32-NEXT: andi a2, a1, 51 +; RV64IILLEGALI32-NEXT: srli a1, a1, 2 +; RV64IILLEGALI32-NEXT: andi a1, a1, 51 +; RV64IILLEGALI32-NEXT: add a1, a2, a1 +; RV64IILLEGALI32-NEXT: srli a2, a1, 4 +; RV64IILLEGALI32-NEXT: add a1, a1, a2 +; RV64IILLEGALI32-NEXT: andi a1, a1, 15 +; RV64IILLEGALI32-NEXT: sll a0, a0, a1 +; RV64IILLEGALI32-NEXT: ret +; +; RV64ZBBILLEGALI32-LABEL: shl_cttz_i8: +; RV64ZBBILLEGALI32: # %bb.0: # %entry +; RV64ZBBILLEGALI32-NEXT: ctz a1, a1 +; RV64ZBBILLEGALI32-NEXT: sll a0, a0, a1 +; RV64ZBBILLEGALI32-NEXT: ret +; +; RV64ILEGALI32-LABEL: shl_cttz_i8: +; RV64ILEGALI32: # %bb.0: # %entry +; RV64ILEGALI32-NEXT: addi a2, a1, -1 +; RV64ILEGALI32-NEXT: not a1, a1 +; RV64ILEGALI32-NEXT: and a1, a1, a2 +; RV64ILEGALI32-NEXT: srliw a2, a1, 1 +; RV64ILEGALI32-NEXT: andi a2, a2, 85 +; RV64ILEGALI32-NEXT: subw a1, a1, a2 +; RV64ILEGALI32-NEXT: andi a2, a1, 51 +; RV64ILEGALI32-NEXT: srliw a1, a1, 2 +; RV64ILEGALI32-NEXT: andi a1, a1, 51 +; RV64ILEGALI32-NEXT: add a1, a2, a1 +; RV64ILEGALI32-NEXT: srliw a2, a1, 4 +; RV64ILEGALI32-NEXT: add a1, a1, a2 +; RV64ILEGALI32-NEXT: andi a1, a1, 15 +; RV64ILEGALI32-NEXT: sllw a0, a0, a1 +; RV64ILEGALI32-NEXT: ret +; +; RV64ZBBLEGALI32-LABEL: shl_cttz_i8: +; RV64ZBBLEGALI32: # %bb.0: # %entry +; RV64ZBBLEGALI32-NEXT: ctzw a1, a1 +; RV64ZBBLEGALI32-NEXT: sllw a0, a0, a1 +; RV64ZBBLEGALI32-NEXT: ret +entry: + %cttz = call i8 @llvm.cttz.i8(i8 %y, i1 true) + %res = shl i8 %x, %cttz + ret i8 %res +} + +define i8 @shl_cttz_constant_i8(i8 %y) { +; RV32I-LABEL: shl_cttz_constant_i8: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi a1, a0, -1 +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: andi a1, a1, 85 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: andi a1, a0, 51 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: andi a0, a0, 51 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: andi a0, a0, 15 +; RV32I-NEXT: li a1, 4 +; RV32I-NEXT: sll a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: shl_cttz_constant_i8: +; RV32ZBB: # %bb.0: # %entry +; RV32ZBB-NEXT: ctz a0, a0 +; RV32ZBB-NEXT: li a1, 4 +; RV32ZBB-NEXT: sll a0, a1, a0 +; RV32ZBB-NEXT: ret +; +; RV64IILLEGALI32-LABEL: shl_cttz_constant_i8: +; RV64IILLEGALI32: # %bb.0: # %entry +; RV64IILLEGALI32-NEXT: addi a1, a0, -1 +; RV64IILLEGALI32-NEXT: not a0, a0 +; RV64IILLEGALI32-NEXT: and a0, a0, a1 +; RV64IILLEGALI32-NEXT: srli a1, a0, 1 +; RV64IILLEGALI32-NEXT: andi a1, a1, 85 +; RV64IILLEGALI32-NEXT: subw a0, a0, a1 +; RV64IILLEGALI32-NEXT: andi a1, a0, 51 +; RV64IILLEGALI32-NEXT: srli a0, a0, 2 +; RV64IILLEGALI32-NEXT: andi a0, a0, 51 +; RV64IILLEGALI32-NEXT: add a0, a1, a0 +; RV64IILLEGALI32-NEXT: srli a1, a0, 4 +; RV64IILLEGALI32-NEXT: add a0, a0, a1 +; RV64IILLEGALI32-NEXT: andi a0, a0, 15 +; RV64IILLEGALI32-NEXT: li a1, 4 +; RV64IILLEGALI32-NEXT: sll a0, a1, a0 +; RV64IILLEGALI32-NEXT: ret +; +; RV64ZBBILLEGALI32-LABEL: shl_cttz_constant_i8: +; RV64ZBBILLEGALI32: # %bb.0: # %entry +; RV64ZBBILLEGALI32-NEXT: ctz a0, a0 +; RV64ZBBILLEGALI32-NEXT: li a1, 4 +; RV64ZBBILLEGALI32-NEXT: sll a0, a1, a0 +; RV64ZBBILLEGALI32-NEXT: ret +; +; RV64ILEGALI32-LABEL: shl_cttz_constant_i8: +; RV64ILEGALI32: # %bb.0: # %entry +; RV64ILEGALI32-NEXT: addi a1, a0, -1 +; RV64ILEGALI32-NEXT: not a0, a0 +; RV64ILEGALI32-NEXT: and a0, a0, a1 +; RV64ILEGALI32-NEXT: srliw a1, a0, 1 +; RV64ILEGALI32-NEXT: andi a1, a1, 85 +; RV64ILEGALI32-NEXT: subw a0, a0, a1 +; RV64ILEGALI32-NEXT: andi a1, a0, 51 +; RV64ILEGALI32-NEXT: srliw a0, a0, 2 +; RV64ILEGALI32-NEXT: andi a0, a0, 51 +; RV64ILEGALI32-NEXT: add a0, a1, a0 +; RV64ILEGALI32-NEXT: srliw a1, a0, 4 +; RV64ILEGALI32-NEXT: add a0, a0, a1 +; RV64ILEGALI32-NEXT: andi a0, a0, 15 +; RV64ILEGALI32-NEXT: li a1, 4 +; RV64ILEGALI32-NEXT: sllw a0, a1, a0 +; RV64ILEGALI32-NEXT: ret +; +; RV64ZBBLEGALI32-LABEL: shl_cttz_constant_i8: +; RV64ZBBLEGALI32: # %bb.0: # %entry +; RV64ZBBLEGALI32-NEXT: ctzw a0, a0 +; RV64ZBBLEGALI32-NEXT: li a1, 4 +; RV64ZBBLEGALI32-NEXT: sllw a0, a1, a0 +; RV64ZBBLEGALI32-NEXT: ret +entry: + %cttz = call i8 @llvm.cttz.i8(i8 %y, i1 true) + %res = shl i8 4, %cttz + ret i8 %res +} + +define i16 @shl_cttz_i16(i16 %x, i16 %y) { +; RV32I-LABEL: shl_cttz_i16: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi a2, a1, -1 +; RV32I-NEXT: not a1, a1 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: srli a2, a1, 1 +; RV32I-NEXT: lui a3, 5 +; RV32I-NEXT: addi a3, a3, 1365 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: lui a2, 3 +; RV32I-NEXT: addi a2, a2, 819 +; RV32I-NEXT: and a3, a1, a2 +; RV32I-NEXT: srli a1, a1, 2 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: srli a2, a1, 4 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: andi a2, a1, 15 +; RV32I-NEXT: slli a1, a1, 20 +; RV32I-NEXT: srli a1, a1, 28 +; RV32I-NEXT: add a1, a2, a1 +; RV32I-NEXT: sll a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: shl_cttz_i16: +; RV32ZBB: # %bb.0: # %entry +; RV32ZBB-NEXT: ctz a1, a1 +; RV32ZBB-NEXT: sll a0, a0, a1 +; RV32ZBB-NEXT: ret +; +; RV64IILLEGALI32-LABEL: shl_cttz_i16: +; RV64IILLEGALI32: # %bb.0: # %entry +; RV64IILLEGALI32-NEXT: addi a2, a1, -1 +; RV64IILLEGALI32-NEXT: not a1, a1 +; RV64IILLEGALI32-NEXT: and a1, a1, a2 +; RV64IILLEGALI32-NEXT: srli a2, a1, 1 +; RV64IILLEGALI32-NEXT: lui a3, 5 +; RV64IILLEGALI32-NEXT: addiw a3, a3, 1365 +; RV64IILLEGALI32-NEXT: and a2, a2, a3 +; RV64IILLEGALI32-NEXT: sub a1, a1, a2 +; RV64IILLEGALI32-NEXT: lui a2, 3 +; RV64IILLEGALI32-NEXT: addiw a2, a2, 819 +; RV64IILLEGALI32-NEXT: and a3, a1, a2 +; RV64IILLEGALI32-NEXT: srli a1, a1, 2 +; RV64IILLEGALI32-NEXT: and a1, a1, a2 +; RV64IILLEGALI32-NEXT: add a1, a3, a1 +; RV64IILLEGALI32-NEXT: srli a2, a1, 4 +; RV64IILLEGALI32-NEXT: add a1, a1, a2 +; RV64IILLEGALI32-NEXT: andi a2, a1, 15 +; RV64IILLEGALI32-NEXT: slli a1, a1, 52 +; RV64IILLEGALI32-NEXT: srli a1, a1, 60 +; RV64IILLEGALI32-NEXT: add a1, a2, a1 +; RV64IILLEGALI32-NEXT: sll a0, a0, a1 +; RV64IILLEGALI32-NEXT: ret +; +; RV64ZBBILLEGALI32-LABEL: shl_cttz_i16: +; RV64ZBBILLEGALI32: # %bb.0: # %entry +; RV64ZBBILLEGALI32-NEXT: ctz a1, a1 +; RV64ZBBILLEGALI32-NEXT: sll a0, a0, a1 +; RV64ZBBILLEGALI32-NEXT: ret +; +; RV64ILEGALI32-LABEL: shl_cttz_i16: +; RV64ILEGALI32: # %bb.0: # %entry +; RV64ILEGALI32-NEXT: addi a2, a1, -1 +; RV64ILEGALI32-NEXT: not a1, a1 +; RV64ILEGALI32-NEXT: and a1, a1, a2 +; RV64ILEGALI32-NEXT: srliw a2, a1, 1 +; RV64ILEGALI32-NEXT: lui a3, 5 +; RV64ILEGALI32-NEXT: addi a3, a3, 1365 +; RV64ILEGALI32-NEXT: and a2, a2, a3 +; RV64ILEGALI32-NEXT: subw a1, a1, a2 +; RV64ILEGALI32-NEXT: lui a2, 3 +; RV64ILEGALI32-NEXT: addi a2, a2, 819 +; RV64ILEGALI32-NEXT: and a3, a1, a2 +; RV64ILEGALI32-NEXT: srliw a1, a1, 2 +; RV64ILEGALI32-NEXT: and a1, a1, a2 +; RV64ILEGALI32-NEXT: add a1, a3, a1 +; RV64ILEGALI32-NEXT: srliw a2, a1, 4 +; RV64ILEGALI32-NEXT: add a1, a1, a2 +; RV64ILEGALI32-NEXT: andi a2, a1, 15 +; RV64ILEGALI32-NEXT: slli a1, a1, 52 +; RV64ILEGALI32-NEXT: srli a1, a1, 60 +; RV64ILEGALI32-NEXT: add a1, a2, a1 +; RV64ILEGALI32-NEXT: sllw a0, a0, a1 +; RV64ILEGALI32-NEXT: ret +; +; RV64ZBBLEGALI32-LABEL: shl_cttz_i16: +; RV64ZBBLEGALI32: # %bb.0: # %entry +; RV64ZBBLEGALI32-NEXT: ctzw a1, a1 +; RV64ZBBLEGALI32-NEXT: sllw a0, a0, a1 +; RV64ZBBLEGALI32-NEXT: ret +entry: + %cttz = call i16 @llvm.cttz.i16(i16 %y, i1 true) + %res = shl i16 %x, %cttz + ret i16 %res +} + +define i16 @shl_cttz_constant_i16(i16 %y) { +; RV32I-LABEL: shl_cttz_constant_i16: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi a1, a0, -1 +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 5 +; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: lui a1, 3 +; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: and a2, a0, a1 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: andi a1, a0, 15 +; RV32I-NEXT: slli a0, a0, 20 +; RV32I-NEXT: srli a0, a0, 28 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: li a1, 4 +; RV32I-NEXT: sll a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: shl_cttz_constant_i16: +; RV32ZBB: # %bb.0: # %entry +; RV32ZBB-NEXT: ctz a0, a0 +; RV32ZBB-NEXT: li a1, 4 +; RV32ZBB-NEXT: sll a0, a1, a0 +; RV32ZBB-NEXT: ret +; +; RV64IILLEGALI32-LABEL: shl_cttz_constant_i16: +; RV64IILLEGALI32: # %bb.0: # %entry +; RV64IILLEGALI32-NEXT: addi a1, a0, -1 +; RV64IILLEGALI32-NEXT: not a0, a0 +; RV64IILLEGALI32-NEXT: and a0, a0, a1 +; RV64IILLEGALI32-NEXT: srli a1, a0, 1 +; RV64IILLEGALI32-NEXT: lui a2, 5 +; RV64IILLEGALI32-NEXT: addiw a2, a2, 1365 +; RV64IILLEGALI32-NEXT: and a1, a1, a2 +; RV64IILLEGALI32-NEXT: sub a0, a0, a1 +; RV64IILLEGALI32-NEXT: lui a1, 3 +; RV64IILLEGALI32-NEXT: addiw a1, a1, 819 +; RV64IILLEGALI32-NEXT: and a2, a0, a1 +; RV64IILLEGALI32-NEXT: srli a0, a0, 2 +; RV64IILLEGALI32-NEXT: and a0, a0, a1 +; RV64IILLEGALI32-NEXT: add a0, a2, a0 +; RV64IILLEGALI32-NEXT: srli a1, a0, 4 +; RV64IILLEGALI32-NEXT: add a0, a0, a1 +; RV64IILLEGALI32-NEXT: andi a1, a0, 15 +; RV64IILLEGALI32-NEXT: slli a0, a0, 52 +; RV64IILLEGALI32-NEXT: srli a0, a0, 60 +; RV64IILLEGALI32-NEXT: add a0, a1, a0 +; RV64IILLEGALI32-NEXT: li a1, 4 +; RV64IILLEGALI32-NEXT: sll a0, a1, a0 +; RV64IILLEGALI32-NEXT: ret +; +; RV64ZBBILLEGALI32-LABEL: shl_cttz_constant_i16: +; RV64ZBBILLEGALI32: # %bb.0: # %entry +; RV64ZBBILLEGALI32-NEXT: ctz a0, a0 +; RV64ZBBILLEGALI32-NEXT: li a1, 4 +; RV64ZBBILLEGALI32-NEXT: sll a0, a1, a0 +; RV64ZBBILLEGALI32-NEXT: ret +; +; RV64ILEGALI32-LABEL: shl_cttz_constant_i16: +; RV64ILEGALI32: # %bb.0: # %entry +; RV64ILEGALI32-NEXT: addi a1, a0, -1 +; RV64ILEGALI32-NEXT: not a0, a0 +; RV64ILEGALI32-NEXT: and a0, a0, a1 +; RV64ILEGALI32-NEXT: srliw a1, a0, 1 +; RV64ILEGALI32-NEXT: lui a2, 5 +; RV64ILEGALI32-NEXT: addi a2, a2, 1365 +; RV64ILEGALI32-NEXT: and a1, a1, a2 +; RV64ILEGALI32-NEXT: subw a0, a0, a1 +; RV64ILEGALI32-NEXT: lui a1, 3 +; RV64ILEGALI32-NEXT: addi a1, a1, 819 +; RV64ILEGALI32-NEXT: and a2, a0, a1 +; RV64ILEGALI32-NEXT: srliw a0, a0, 2 +; RV64ILEGALI32-NEXT: and a0, a0, a1 +; RV64ILEGALI32-NEXT: add a0, a2, a0 +; RV64ILEGALI32-NEXT: srliw a1, a0, 4 +; RV64ILEGALI32-NEXT: add a0, a0, a1 +; RV64ILEGALI32-NEXT: andi a1, a0, 15 +; RV64ILEGALI32-NEXT: slli a0, a0, 52 +; RV64ILEGALI32-NEXT: srli a0, a0, 60 +; RV64ILEGALI32-NEXT: add a0, a1, a0 +; RV64ILEGALI32-NEXT: li a1, 4 +; RV64ILEGALI32-NEXT: sllw a0, a1, a0 +; RV64ILEGALI32-NEXT: ret +; +; RV64ZBBLEGALI32-LABEL: shl_cttz_constant_i16: +; RV64ZBBLEGALI32: # %bb.0: # %entry +; RV64ZBBLEGALI32-NEXT: ctzw a0, a0 +; RV64ZBBLEGALI32-NEXT: li a1, 4 +; RV64ZBBLEGALI32-NEXT: sllw a0, a1, a0 +; RV64ZBBLEGALI32-NEXT: ret +entry: + %cttz = call i16 @llvm.cttz.i16(i16 %y, i1 true) + %res = shl i16 4, %cttz + ret i16 %res +} + +define i32 @shl_cttz_i32(i32 %x, i32 %y) { +; RV32I-LABEL: shl_cttz_i32: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: neg a2, a1 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: mul a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: shl_cttz_i32: +; RV32ZBB: # %bb.0: # %entry +; RV32ZBB-NEXT: ctz a1, a1 +; RV32ZBB-NEXT: sll a0, a0, a1 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: shl_cttz_i32: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: negw a2, a1 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a2, 30667 +; RV64I-NEXT: addi a2, a2, 1329 +; RV64I-NEXT: mul a1, a1, a2 +; RV64I-NEXT: srliw a1, a1, 27 +; RV64I-NEXT: lui a2, %hi(.LCPI4_0) +; RV64I-NEXT: addi a2, a2, %lo(.LCPI4_0) +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: lbu a1, 0(a1) +; RV64I-NEXT: sllw a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: shl_cttz_i32: +; RV64ZBB: # %bb.0: # %entry +; RV64ZBB-NEXT: ctzw a1, a1 +; RV64ZBB-NEXT: sllw a0, a0, a1 +; RV64ZBB-NEXT: ret +entry: + %cttz = call i32 @llvm.cttz.i32(i32 %y, i1 true) + %res = shl i32 %x, %cttz + ret i32 %res +} + +define i32 @shl_cttz_i32_zero_is_defined(i32 %x, i32 %y) { +; RV32I-LABEL: shl_cttz_i32_zero_is_defined: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: beqz a1, .LBB5_2 +; RV32I-NEXT: # %bb.1: # %cond.false +; RV32I-NEXT: neg a2, a1 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: lui a2, 30667 +; RV32I-NEXT: addi a2, a2, 1329 +; RV32I-NEXT: mul a1, a1, a2 +; RV32I-NEXT: srli a1, a1, 27 +; RV32I-NEXT: lui a2, %hi(.LCPI5_0) +; RV32I-NEXT: addi a2, a2, %lo(.LCPI5_0) +; RV32I-NEXT: add a1, a2, a1 +; RV32I-NEXT: lbu a1, 0(a1) +; RV32I-NEXT: sll a0, a0, a1 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB5_2: +; RV32I-NEXT: li a1, 32 +; RV32I-NEXT: sll a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: shl_cttz_i32_zero_is_defined: +; RV32ZBB: # %bb.0: # %entry +; RV32ZBB-NEXT: ctz a1, a1 +; RV32ZBB-NEXT: sll a0, a0, a1 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: shl_cttz_i32_zero_is_defined: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: sext.w a2, a1 +; RV64I-NEXT: beqz a2, .LBB5_2 +; RV64I-NEXT: # %bb.1: # %cond.false +; RV64I-NEXT: negw a2, a1 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a2, 30667 +; RV64I-NEXT: addi a2, a2, 1329 +; RV64I-NEXT: mul a1, a1, a2 +; RV64I-NEXT: srliw a1, a1, 27 +; RV64I-NEXT: lui a2, %hi(.LCPI5_0) +; RV64I-NEXT: addi a2, a2, %lo(.LCPI5_0) +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: lbu a1, 0(a1) +; RV64I-NEXT: sllw a0, a0, a1 +; RV64I-NEXT: ret +; RV64I-NEXT: .LBB5_2: +; RV64I-NEXT: li a1, 32 +; RV64I-NEXT: sllw a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: shl_cttz_i32_zero_is_defined: +; RV64ZBB: # %bb.0: # %entry +; RV64ZBB-NEXT: ctzw a1, a1 +; RV64ZBB-NEXT: sllw a0, a0, a1 +; RV64ZBB-NEXT: ret +entry: + %cttz = call i32 @llvm.cttz.i32(i32 %y, i1 false) + %res = shl i32 %x, %cttz + ret i32 %res +} + +define i32 @shl_cttz_constant_i32(i32 %y) { +; RV32I-LABEL: shl_cttz_constant_i32: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: shl_cttz_constant_i32: +; RV32ZBB: # %bb.0: # %entry +; RV32ZBB-NEXT: ctz a0, a0 +; RV32ZBB-NEXT: li a1, 4 +; RV32ZBB-NEXT: sll a0, a1, a0 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: shl_cttz_constant_i32: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: negw a1, a0 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 30667 +; RV64I-NEXT: addi a1, a1, 1329 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: srliw a0, a0, 27 +; RV64I-NEXT: lui a1, %hi(.LCPI6_0) +; RV64I-NEXT: addi a1, a1, %lo(.LCPI6_0) +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: lbu a0, 0(a0) +; RV64I-NEXT: li a1, 4 +; RV64I-NEXT: sllw a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: shl_cttz_constant_i32: +; RV64ZBB: # %bb.0: # %entry +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: li a1, 4 +; RV64ZBB-NEXT: sllw a0, a1, a0 +; RV64ZBB-NEXT: ret +entry: + %cttz = call i32 @llvm.cttz.i32(i32 %y, i1 true) + %res = shl i32 4, %cttz + ret i32 %res +} + +define i32 @shl_cttz_multiuse_i32(i32 %x, i32 %y) { +; RV32I-LABEL: shl_cttz_multiuse_i32: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: neg a2, a1 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: lui a2, 30667 +; RV32I-NEXT: addi a2, a2, 1329 +; RV32I-NEXT: mul a1, a1, a2 +; RV32I-NEXT: srli a1, a1, 27 +; RV32I-NEXT: lui a2, %hi(.LCPI7_0) +; RV32I-NEXT: addi a2, a2, %lo(.LCPI7_0) +; RV32I-NEXT: add a1, a2, a1 +; RV32I-NEXT: lbu s0, 0(a1) +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call use32 +; RV32I-NEXT: sll a0, s1, s0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: shl_cttz_multiuse_i32: +; RV32ZBB: # %bb.0: # %entry +; RV32ZBB-NEXT: addi sp, sp, -16 +; RV32ZBB-NEXT: .cfi_def_cfa_offset 16 +; RV32ZBB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ZBB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32ZBB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32ZBB-NEXT: .cfi_offset ra, -4 +; RV32ZBB-NEXT: .cfi_offset s0, -8 +; RV32ZBB-NEXT: .cfi_offset s1, -12 +; RV32ZBB-NEXT: mv s0, a0 +; RV32ZBB-NEXT: ctz s1, a1 +; RV32ZBB-NEXT: mv a0, s1 +; RV32ZBB-NEXT: call use32 +; RV32ZBB-NEXT: sll a0, s0, s1 +; RV32ZBB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ZBB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32ZBB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32ZBB-NEXT: addi sp, sp, 16 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: shl_cttz_multiuse_i32: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: .cfi_def_cfa_offset 32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: negw a2, a1 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a2, 30667 +; RV64I-NEXT: addi a2, a2, 1329 +; RV64I-NEXT: mul a1, a1, a2 +; RV64I-NEXT: srliw a1, a1, 27 +; RV64I-NEXT: lui a2, %hi(.LCPI7_0) +; RV64I-NEXT: addi a2, a2, %lo(.LCPI7_0) +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: lbu s0, 0(a1) +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call use32 +; RV64I-NEXT: sllw a0, s1, s0 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: shl_cttz_multiuse_i32: +; RV64ZBB: # %bb.0: # %entry +; RV64ZBB-NEXT: addi sp, sp, -32 +; RV64ZBB-NEXT: .cfi_def_cfa_offset 32 +; RV64ZBB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64ZBB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64ZBB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64ZBB-NEXT: .cfi_offset ra, -8 +; RV64ZBB-NEXT: .cfi_offset s0, -16 +; RV64ZBB-NEXT: .cfi_offset s1, -24 +; RV64ZBB-NEXT: mv s0, a0 +; RV64ZBB-NEXT: ctzw s1, a1 +; RV64ZBB-NEXT: mv a0, s1 +; RV64ZBB-NEXT: call use32 +; RV64ZBB-NEXT: sllw a0, s0, s1 +; RV64ZBB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64ZBB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64ZBB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64ZBB-NEXT: addi sp, sp, 32 +; RV64ZBB-NEXT: ret +entry: + %cttz = call i32 @llvm.cttz.i32(i32 %y, i1 true) + call void @use32(i32 %cttz) + %res = shl i32 %x, %cttz + ret i32 %res +} + +define i64 @shl_cttz_i64(i64 %x, i64 %y) { +; RV32I-LABEL: shl_cttz_i64: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a4, 30667 +; RV32I-NEXT: addi a5, a4, 1329 +; RV32I-NEXT: lui a4, %hi(.LCPI8_0) +; RV32I-NEXT: addi a4, a4, %lo(.LCPI8_0) +; RV32I-NEXT: bnez a2, .LBB8_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: neg a2, a3 +; RV32I-NEXT: and a2, a3, a2 +; RV32I-NEXT: mul a2, a2, a5 +; RV32I-NEXT: srli a2, a2, 27 +; RV32I-NEXT: add a2, a4, a2 +; RV32I-NEXT: lbu a2, 0(a2) +; RV32I-NEXT: addi a4, a2, 32 +; RV32I-NEXT: j .LBB8_3 +; RV32I-NEXT: .LBB8_2: +; RV32I-NEXT: neg a3, a2 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: mul a2, a2, a5 +; RV32I-NEXT: srli a2, a2, 27 +; RV32I-NEXT: add a2, a4, a2 +; RV32I-NEXT: lbu a4, 0(a2) +; RV32I-NEXT: .LBB8_3: # %entry +; RV32I-NEXT: addi a3, a4, -32 +; RV32I-NEXT: sll a2, a0, a4 +; RV32I-NEXT: bltz a3, .LBB8_5 +; RV32I-NEXT: # %bb.4: # %entry +; RV32I-NEXT: mv a1, a2 +; RV32I-NEXT: j .LBB8_6 +; RV32I-NEXT: .LBB8_5: +; RV32I-NEXT: sll a1, a1, a4 +; RV32I-NEXT: not a4, a4 +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: srl a0, a0, a4 +; RV32I-NEXT: or a1, a1, a0 +; RV32I-NEXT: .LBB8_6: # %entry +; RV32I-NEXT: srai a0, a3, 31 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: shl_cttz_i64: +; RV32ZBB: # %bb.0: # %entry +; RV32ZBB-NEXT: bnez a2, .LBB8_2 +; RV32ZBB-NEXT: # %bb.1: # %entry +; RV32ZBB-NEXT: ctz a2, a3 +; RV32ZBB-NEXT: addi a4, a2, 32 +; RV32ZBB-NEXT: j .LBB8_3 +; RV32ZBB-NEXT: .LBB8_2: +; RV32ZBB-NEXT: ctz a4, a2 +; RV32ZBB-NEXT: .LBB8_3: # %entry +; RV32ZBB-NEXT: addi a3, a4, -32 +; RV32ZBB-NEXT: sll a2, a0, a4 +; RV32ZBB-NEXT: bltz a3, .LBB8_5 +; RV32ZBB-NEXT: # %bb.4: # %entry +; RV32ZBB-NEXT: mv a1, a2 +; RV32ZBB-NEXT: j .LBB8_6 +; RV32ZBB-NEXT: .LBB8_5: +; RV32ZBB-NEXT: sll a1, a1, a4 +; RV32ZBB-NEXT: not a4, a4 +; RV32ZBB-NEXT: srli a0, a0, 1 +; RV32ZBB-NEXT: srl a0, a0, a4 +; RV32ZBB-NEXT: or a1, a1, a0 +; RV32ZBB-NEXT: .LBB8_6: # %entry +; RV32ZBB-NEXT: srai a0, a3, 31 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: shl_cttz_i64: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: neg a2, a1 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: mul a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: shl_cttz_i64: +; RV64ZBB: # %bb.0: # %entry +; RV64ZBB-NEXT: ctz a1, a1 +; RV64ZBB-NEXT: sll a0, a0, a1 +; RV64ZBB-NEXT: ret +entry: + %cttz = call i64 @llvm.cttz.i64(i64 %y, i1 true) + %res = shl i64 %x, %cttz + ret i64 %res +} + +define i64 @shl_cttz_constant_i64(i64 %y) { +; RV32I-LABEL: shl_cttz_constant_i64: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a2, 30667 +; RV32I-NEXT: addi a3, a2, 1329 +; RV32I-NEXT: lui a2, %hi(.LCPI9_0) +; RV32I-NEXT: addi a2, a2, %lo(.LCPI9_0) +; RV32I-NEXT: bnez a0, .LBB9_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: neg a0, a1 +; RV32I-NEXT: and a0, a1, a0 +; RV32I-NEXT: mul a0, a0, a3 +; RV32I-NEXT: srli a0, a0, 27 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: lbu a0, 0(a0) +; RV32I-NEXT: addi a1, a0, 32 +; RV32I-NEXT: j .LBB9_3 +; RV32I-NEXT: .LBB9_2: +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: mul a0, a0, a3 +; RV32I-NEXT: srli a0, a0, 27 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: lbu a1, 0(a0) +; RV32I-NEXT: .LBB9_3: # %entry +; RV32I-NEXT: li a0, 4 +; RV32I-NEXT: addi a2, a1, -32 +; RV32I-NEXT: sll a0, a0, a1 +; RV32I-NEXT: bltz a2, .LBB9_5 +; RV32I-NEXT: # %bb.4: # %entry +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: j .LBB9_6 +; RV32I-NEXT: .LBB9_5: +; RV32I-NEXT: not a1, a1 +; RV32I-NEXT: li a3, 2 +; RV32I-NEXT: srl a1, a3, a1 +; RV32I-NEXT: .LBB9_6: # %entry +; RV32I-NEXT: srai a2, a2, 31 +; RV32I-NEXT: and a0, a2, a0 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: shl_cttz_constant_i64: +; RV32ZBB: # %bb.0: # %entry +; RV32ZBB-NEXT: bnez a0, .LBB9_2 +; RV32ZBB-NEXT: # %bb.1: # %entry +; RV32ZBB-NEXT: ctz a0, a1 +; RV32ZBB-NEXT: addi a1, a0, 32 +; RV32ZBB-NEXT: j .LBB9_3 +; RV32ZBB-NEXT: .LBB9_2: +; RV32ZBB-NEXT: ctz a1, a0 +; RV32ZBB-NEXT: .LBB9_3: # %entry +; RV32ZBB-NEXT: li a0, 4 +; RV32ZBB-NEXT: addi a2, a1, -32 +; RV32ZBB-NEXT: sll a0, a0, a1 +; RV32ZBB-NEXT: bltz a2, .LBB9_5 +; RV32ZBB-NEXT: # %bb.4: # %entry +; RV32ZBB-NEXT: mv a1, a0 +; RV32ZBB-NEXT: j .LBB9_6 +; RV32ZBB-NEXT: .LBB9_5: +; RV32ZBB-NEXT: not a1, a1 +; RV32ZBB-NEXT: li a3, 2 +; RV32ZBB-NEXT: srl a1, a3, a1 +; RV32ZBB-NEXT: .LBB9_6: # %entry +; RV32ZBB-NEXT: srai a2, a2, 31 +; RV32ZBB-NEXT: and a0, a2, a0 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: shl_cttz_constant_i64: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: shl_cttz_constant_i64: +; RV64ZBB: # %bb.0: # %entry +; RV64ZBB-NEXT: ctz a0, a0 +; RV64ZBB-NEXT: li a1, 4 +; RV64ZBB-NEXT: sll a0, a1, a0 +; RV64ZBB-NEXT: ret +entry: + %cttz = call i64 @llvm.cttz.i64(i64 %y, i1 true) + %res = shl i64 4, %cttz + ret i64 %res +} + +declare void @use32(i32 signext) diff --git a/llvm/test/CodeGen/RISCV/tail-calls.ll b/llvm/test/CodeGen/RISCV/tail-calls.ll index 87d69bfad38c2..d3e495bb723ad 100644 --- a/llvm/test/CodeGen/RISCV/tail-calls.ll +++ b/llvm/test/CodeGen/RISCV/tail-calls.ll @@ -56,12 +56,12 @@ define void @caller_indirect_tail(i32 %a) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: beqz a0, .LBB3_2 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: lui a0, %hi(callee_indirect2) -; CHECK-NEXT: addi t1, a0, %lo(callee_indirect2) +; CHECK-NEXT: lui t1, %hi(callee_indirect2) +; CHECK-NEXT: addi t1, t1, %lo(callee_indirect2) ; CHECK-NEXT: jr t1 ; CHECK-NEXT: .LBB3_2: -; CHECK-NEXT: lui a0, %hi(callee_indirect1) -; CHECK-NEXT: addi t1, a0, %lo(callee_indirect1) +; CHECK-NEXT: lui t1, %hi(callee_indirect1) +; CHECK-NEXT: addi t1, t1, %lo(callee_indirect1) ; CHECK-NEXT: jr t1 diff --git a/llvm/test/CodeGen/RISCV/unroll-loop-cse.ll b/llvm/test/CodeGen/RISCV/unroll-loop-cse.ll index 2fd4572d23456..6530736304837 100644 --- a/llvm/test/CodeGen/RISCV/unroll-loop-cse.ll +++ b/llvm/test/CodeGen/RISCV/unroll-loop-cse.ll @@ -10,36 +10,30 @@ define signext i32 @unroll_loop_cse() { ; CHECK-LABEL: unroll_loop_cse: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(x) -; CHECK-NEXT: lw a3, %lo(x)(a1) -; CHECK-NEXT: lui a2, %hi(check) -; CHECK-NEXT: lw a4, %lo(check)(a2) +; CHECK-NEXT: lui a0, %hi(x) +; CHECK-NEXT: lw a1, %lo(x)(a0) +; CHECK-NEXT: lui a0, %hi(check) +; CHECK-NEXT: lw a2, %lo(check)(a0) ; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: bne a3, a4, .LBB0_6 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: addi a1, a1, %lo(x) -; CHECK-NEXT: lw a1, 4(a1) -; CHECK-NEXT: addi a2, a2, %lo(check) -; CHECK-NEXT: lw a2, 4(a2) ; CHECK-NEXT: bne a1, a2, .LBB0_6 -; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a1, %hi(x) ; CHECK-NEXT: addi a1, a1, %lo(x) -; CHECK-NEXT: lw a3, 8(a1) +; CHECK-NEXT: lw a3, 4(a1) ; CHECK-NEXT: lui a2, %hi(check) ; CHECK-NEXT: addi a2, a2, %lo(check) +; CHECK-NEXT: lw a4, 4(a2) +; CHECK-NEXT: bne a3, a4, .LBB0_6 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: lw a3, 8(a1) ; CHECK-NEXT: lw a4, 8(a2) ; CHECK-NEXT: bne a3, a4, .LBB0_6 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: lw a1, 12(a1) -; CHECK-NEXT: lw a2, 12(a2) -; CHECK-NEXT: bne a1, a2, .LBB0_6 +; CHECK-NEXT: lw a3, 12(a1) +; CHECK-NEXT: lw a4, 12(a2) +; CHECK-NEXT: bne a3, a4, .LBB0_6 ; CHECK-NEXT: # %bb.4: -; CHECK-NEXT: lui a1, %hi(x) -; CHECK-NEXT: addi a1, a1, %lo(x) ; CHECK-NEXT: lw a3, 16(a1) -; CHECK-NEXT: lui a2, %hi(check) -; CHECK-NEXT: addi a2, a2, %lo(check) ; CHECK-NEXT: lw a4, 16(a2) ; CHECK-NEXT: bne a3, a4, .LBB0_6 ; CHECK-NEXT: # %bb.5: diff --git a/llvm/test/CodeGen/RISCV/vlenb.ll b/llvm/test/CodeGen/RISCV/vlenb.ll index 1d6c1b5d1acbd..26d4f99c3b979 100644 --- a/llvm/test/CodeGen/RISCV/vlenb.ll +++ b/llvm/test/CodeGen/RISCV/vlenb.ll @@ -71,10 +71,13 @@ define void @machine_licm() { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -4 +; CHECK-NEXT: .cfi_offset s0, -8 +; CHECK-NEXT: csrr s0, vlenb ; CHECK-NEXT: .LBB4_1: # %loop ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a0, s0 ; CHECK-NEXT: call use ; CHECK-NEXT: j .LBB4_1 entry: diff --git a/llvm/test/CodeGen/SPIRV/event-zero-const.ll b/llvm/test/CodeGen/SPIRV/event-zero-const.ll new file mode 100644 index 0000000000000..b40456d233f12 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/event-zero-const.ll @@ -0,0 +1,23 @@ +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: %[[#LongTy:]] = OpTypeInt 64 0 +; CHECK: %[[#EventTy:]] = OpTypeEvent +; CHECK: %[[#LongNull:]] = OpConstantNull %[[#LongTy]] +; CHECK: %[[#EventNull:]] = OpConstantNull %[[#EventTy]] +; CHECK: OpFunction +; CHECK: OpINotEqual %[[#]] %[[#]] %[[#LongNull]] +; CHECK: OpGroupAsyncCopy %[[#EventTy]] %[[#]] %[[#]] %[[#]] %[[#]] %[[#]] %[[#EventNull]] + + +define weak_odr dso_local spir_kernel void @foo(i64 %_arg_i, ptr addrspace(1) %_arg_ptr, ptr addrspace(3) %_arg_local) { +entry: + %r1 = icmp ne i64 %_arg_i, 0 + %e1 = tail call spir_func target("spirv.Event") @__spirv_GroupAsyncCopy(i32 2, ptr addrspace(3) %_arg_local, ptr addrspace(1) %_arg_ptr, i64 1, i64 1, target("spirv.Event") zeroinitializer) + ret void +} + +declare dso_local spir_func target("spirv.Event") @__spirv_GroupAsyncCopy(i32, ptr addrspace(3), ptr addrspace(1), i64, i64, target("spirv.Event")) diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_cache_controls/basic-load-store.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_cache_controls/basic-load-store.ll new file mode 100644 index 0000000000000..359f6d1c0f8e5 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_cache_controls/basic-load-store.ll @@ -0,0 +1,53 @@ +; Adapted from https://github.com/KhronosGroup/SPIRV-LLVM-Translator/tree/main/test/extensions/INTEL/SPV_INTEL_cache_controls + +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_cache_controls %s -o - | FileCheck %s --check-prefixes=CHECK-SPIRV +; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_cache_controls %s -o - -filetype=obj | spirv-val %} + +; CHECK-SPIRV: Capability CacheControlsINTEL +; CHECK-SPIRV: Extension "SPV_INTEL_cache_controls" +; CHECK-SPIRV-DAG: OpName %[[#GVar:]] "G" +; CHECK-SPIRV-DAG: OpName %[[#Arg:]] "buffer" +; CHECK-SPIRV-DAG: OpDecorate %[[#GVar]] CacheControlStoreINTEL 0 1 +; CHECK-SPIRV-DAG: OpDecorate %[[#GVar]] CacheControlStoreINTEL 1 3 +; CHECK-SPIRV-DAG: OpDecorate %[[#Arg]] CacheControlLoadINTEL 0 0 +; CHECK-SPIRV-DAG: OpDecorate %[[#Arg]] CacheControlStoreINTEL 0 1 +; CHECK-SPIRV-DAG: OpDecorate %[[#LoadPtr:]] CacheControlLoadINTEL 0 1 +; CHECK-SPIRV-DAG: OpDecorate %[[#LoadPtr]] CacheControlLoadINTEL 1 1 +; CHECK-SPIRV-DAG: OpDecorate %[[#StorePtr:]] CacheControlStoreINTEL 0 1 +; CHECK-SPIRV-DAG: OpDecorate %[[#StorePtr]] CacheControlStoreINTEL 1 2 +; CHECK-SPIRV: OpLoad %[[#]] %[[#LoadPtr]] +; CHECK-SPIRV: OpStore %[[#StorePtr]] %[[#]] + +@G = common addrspace(1) global i32 0, align 4, !spirv.Decorations !9 + +define spir_kernel void @test(ptr addrspace(1) %dummy, ptr addrspace(1) %buffer) !spirv.ParameterDecorations !12 { +entry: + %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %buffer, i64 1, !spirv.Decorations !3 + %0 = load i32, ptr addrspace(1) %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, ptr addrspace(1) %buffer, i64 0, !spirv.Decorations !6 + store i32 %0, ptr addrspace(1) %arrayidx1, align 4 + ret void +} + +!spirv.MemoryModel = !{!0} +!spirv.Source = !{!1} +!opencl.spir.version = !{!2} +!opencl.ocl.version = !{!2} + +!0 = !{i32 2, i32 2} +!1 = !{i32 3, i32 102000} +!2 = !{i32 1, i32 2} +!3 = !{!4, !5} +!4 = !{i32 6442, i32 0, i32 1} ; {CacheControlLoadINTEL, CacheLevel=0, Cached} +!5 = !{i32 6442, i32 1, i32 1} ; {CacheControlLoadINTEL, CacheLevel=1, Cached} +!6 = !{!7, !8} +!7 = !{i32 6443, i32 0, i32 1} ; {CacheControlStoreINTEL, CacheLevel=0, WriteThrough} +!8 = !{i32 6443, i32 1, i32 2} ; {CacheControlStoreINTEL, CacheLevel=1, WriteBack} +!9 = !{!10, !11} +!10 = !{i32 6443, i32 0, i32 1} ; {CacheControlStoreINTEL, CacheLevel=0, WriteThrough} +!11 = !{i32 6443, i32 1, i32 3} ; {CacheControlStoreINTEL, CacheLevel=1, Streaming} +!12 = !{!13, !14} +!13 = !{} +!14 = !{!15, !16} +!15 = !{i32 6442, i32 0, i32 0} ; {CacheControlLoadINTEL, CacheLevel=0, Uncached} +!16 = !{i32 6443, i32 0, i32 1} ; {CacheControlStoreINTEL, CacheLevel=0, WriteThrough} diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_cache_controls/decorate-prefetch-w-cache-controls.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_cache_controls/decorate-prefetch-w-cache-controls.ll new file mode 100644 index 0000000000000..9a13b720f61f7 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_cache_controls/decorate-prefetch-w-cache-controls.ll @@ -0,0 +1,44 @@ +; Adapted from https://github.com/KhronosGroup/SPIRV-LLVM-Translator/tree/main/test/extensions/INTEL/SPV_INTEL_cache_controls + +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_cache_controls %s -o - | FileCheck %s --check-prefixes=CHECK-SPIRV +; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_cache_controls %s -o - -filetype=obj | spirv-val %} + +; CHECK-SPIRV: Capability CacheControlsINTEL +; CHECK-SPIRV: Extension "SPV_INTEL_cache_controls" + +; CHECK-SPIRV-DAG: OpName %[[#Ptr1:]] "ptr1" +; CHECK-SPIRV-DAG: OpName %[[#Ptr2:]] "ptr2" +; CHECK-SPIRV-DAG: OpName %[[#Ptr3:]] "ptr3" +; CHECK-SPIRV-DAG: OpDecorate %[[#Ptr1]] CacheControlLoadINTEL 0 1 +; CHECK-SPIRV-DAG: OpDecorate %[[#Ptr2]] CacheControlLoadINTEL 1 1 +; CHECK-SPIRV-DAG: OpDecorate %[[#Ptr3]] CacheControlStoreINTEL 2 3 +; CHECK-SPIRV: OpExtInst %[[#]] %[[#]] prefetch %[[#Ptr1]] %[[#]] +; CHECK-SPIRV: OpExtInst %[[#]] %[[#]] prefetch %[[#Ptr2]] %[[#]] +; CHECK-SPIRV: OpExtInst %[[#]] %[[#]] prefetch %[[#Ptr3]] %[[#]] + +; 6442 stands for CacheControlLoadINTEL token +@.str.1 = private unnamed_addr addrspace(1) constant [16 x i8] c"../prefetch.hpp\00", section "llvm.metadata" +@.str.9 = private unnamed_addr addrspace(1) constant [13 x i8] c"{6442:\220,1\22}\00", section "llvm.metadata" +@.str.10 = private unnamed_addr addrspace(1) constant [13 x i8] c"{6442:\221,1\22}\00", section "llvm.metadata" +@.str.11 = private unnamed_addr addrspace(1) constant [13 x i8] c"{6443:\222,3\22}\00", section "llvm.metadata" + +define weak_odr dso_local spir_kernel void @foo(ptr addrspace(1) noundef align 1 %_arg_dataPtr) { +entry: + %r0 = addrspacecast ptr addrspace(1) %_arg_dataPtr to ptr addrspace(4) + %ptr1 = tail call spir_func noundef ptr addrspace(1) @_Z41__spirv_GenericCastToPtrExplicit_ToGlobalPvi(ptr addrspace(4) noundef %r0, i32 noundef 5) + %r1 = tail call ptr addrspace(1) @llvm.ptr.annotation.p1.p1(ptr addrspace(1) %ptr1, ptr addrspace(1) @.str.9, ptr addrspace(1) @.str.1, i32 76, ptr addrspace(1) null) + tail call spir_func void @_Z20__spirv_ocl_prefetchPU3AS1Kcm(ptr addrspace(1) noundef %r1, i64 noundef 1) + %arrayidx3.i = getelementptr inbounds i8, ptr addrspace(4) %r0, i64 1 + %ptr2 = tail call spir_func noundef ptr addrspace(1) @_Z41__spirv_GenericCastToPtrExplicit_ToGlobalPvi(ptr addrspace(4) noundef %arrayidx3.i, i32 noundef 5) + %r2 = tail call ptr addrspace(1) @llvm.ptr.annotation.p1.p1(ptr addrspace(1) %ptr2, ptr addrspace(1) @.str.10, ptr addrspace(1) @.str.1, i32 80, ptr addrspace(1) null) + tail call spir_func void @_Z20__spirv_ocl_prefetchPU3AS1Kcm(ptr addrspace(1) noundef %r2, i64 noundef 1) + %arrayidx7.i = getelementptr inbounds i8, ptr addrspace(4) %r0, i64 2 + %ptr3 = tail call spir_func noundef ptr addrspace(1) @_Z41__spirv_GenericCastToPtrExplicit_ToGlobalPvi(ptr addrspace(4) noundef %arrayidx7.i, i32 noundef 5) + %r3 = tail call ptr addrspace(1) @llvm.ptr.annotation.p1.p1(ptr addrspace(1) %ptr3, ptr addrspace(1) @.str.11, ptr addrspace(1) @.str.1, i32 80, ptr addrspace(1) null) + tail call spir_func void @_Z20__spirv_ocl_prefetchPU3AS1Kcm(ptr addrspace(1) noundef %r3, i64 noundef 2) + ret void +} + +declare ptr addrspace(1) @llvm.ptr.annotation.p1.p1(ptr addrspace(1), ptr addrspace(1), ptr addrspace(1), i32, ptr addrspace(1)) +declare dso_local spir_func void @_Z20__spirv_ocl_prefetchPU3AS1Kcm(ptr addrspace(1) noundef, i64 noundef) +declare dso_local spir_func noundef ptr addrspace(1) @_Z41__spirv_GenericCastToPtrExplicit_ToGlobalPvi(ptr addrspace(4) noundef, i32 noundef) diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_global_variable_fpga_decorations/global-var-decorations.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_global_variable_fpga_decorations/global-var-decorations.ll new file mode 100644 index 0000000000000..40008873bf19b --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_global_variable_fpga_decorations/global-var-decorations.ll @@ -0,0 +1,33 @@ +; Adapted from https://github.com/KhronosGroup/SPIRV-LLVM-Translator/tree/main/test/extensions/INTEL/SPV_INTEL_global_variable_fpga_decorations + +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_global_variable_fpga_decorations %s -o - | FileCheck %s --check-prefixes=CHECK-SPIRV +; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_global_variable_fpga_decorations %s -o - -filetype=obj | spirv-val %} + +; CHECK-SPIRV: Capability GlobalVariableFPGADecorationsINTEL +; CHECK-SPIRV: Extension "SPV_INTEL_global_variable_fpga_decorations" +; CHECK-SPIRV-DAG: OpName %[[#G1:]] "int_var" +; CHECK-SPIRV-DAG: OpName %[[#G2:]] "float_var" +; CHECK-SPIRV-DAG: OpName %[[#G3:]] "bool_var" +; CHECK-SPIRV-DAG: OpDecorate %[[#G1]] ImplementInRegisterMapINTEL 1 +; CHECK-SPIRV-DAG: OpDecorate %[[#G1]] InitModeINTEL 0 +; CHECK-SPIRV-DAG: OpDecorate %[[#G2]] ImplementInRegisterMapINTEL 1 +; CHECK-SPIRV-DAG: OpDecorate %[[#G2]] InitModeINTEL 1 +; CHECK-SPIRV-DAG: OpDecorate %[[#G3]] ImplementInRegisterMapINTEL 0 +; CHECK-SPIRV-DAG: OpDecorate %[[#G3]] InitModeINTEL 0 + +@int_var = addrspace(1) global i32 42, !spirv.Decorations !1 +@float_var = addrspace(1) global float 1.0, !spirv.Decorations !5 +@bool_var = addrspace(1) global i1 0, !spirv.Decorations !7 + +define spir_kernel void @test() { +entry: + ret void +} + +!1 = !{!2, !3} +!2 = !{i32 6191, i1 true} ; ImplementInRegisterMapINTEL = true +!3 = !{i32 6190, i32 0} ; InitModeINTEL = 0 +!4 = !{i32 6190, i32 1} ; InitModeINTEL = 1 +!5 = !{!2, !4} +!6 = !{i32 6191, i1 false} ; ImplementInRegisterMapINTEL = false +!7 = !{!6, !3} diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_global_variable_host_access/global-var-host-access.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_global_variable_host_access/global-var-host-access.ll new file mode 100644 index 0000000000000..1397435efb2d4 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_global_variable_host_access/global-var-host-access.ll @@ -0,0 +1,34 @@ +; Adapted from https://github.com/KhronosGroup/SPIRV-LLVM-Translator/tree/main/test/extensions/INTEL/SPV_INTEL_global_variable_host_access + +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_global_variable_host_access,+SPV_INTEL_global_variable_fpga_decorations %s -o - | FileCheck %s --check-prefixes=CHECK-SPIRV +; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_global_variable_host_access,+SPV_INTEL_global_variable_fpga_decorations %s -o - -filetype=obj | spirv-val %} + +; CHECK-SPIRV-DAG: Capability GlobalVariableHostAccessINTEL +; CHECK-SPIRV-DAG: Capability GlobalVariableFPGADecorationsINTEL +; CHECK-SPIRV-DAG: Extension "SPV_INTEL_global_variable_host_access" +; CHECK-SPIRV-DAG: Extension "SPV_INTEL_global_variable_fpga_decorations" + +; CHECK-SPIRV-DAG: OpName %[[#G1:]] "int_var" +; CHECK-SPIRV-DAG: OpName %[[#G2:]] "bool_var" +; CHECK-SPIRV-DAG: OpName %[[#G3:]] "float_var" +; CHECK-SPIRV-DAG: OpDecorate %[[#G1]] HostAccessINTEL 1 "IntVarName" +; CHECK-SPIRV-DAG: OpDecorate %[[#G2]] HostAccessINTEL 3 "BoolVarName" +; CHECK-SPIRV-DAG: OpDecorate %[[#G3]] ImplementInRegisterMapINTEL 1 +; CHECK-SPIRV-DAG: OpDecorate %[[#G3]] InitModeINTEL 1 + +@int_var = addrspace(1) global i32 42, !spirv.Decorations !1 +@bool_var = addrspace(1) global i1 0, !spirv.Decorations !4 +@float_var = addrspace(1) global float 1.0, !spirv.Decorations !5 + +define spir_kernel void @test() { +entry: + ret void +} + +!1 = !{!2} +!2 = !{i32 6188, i32 1, !"IntVarName"} ; HostAccessINTEL 1 "IntVarName" +!3 = !{i32 6188, i32 3, !"BoolVarName"} ; HostAccessINTEL 3 "BoolVarName" +!4 = !{!3} +!5 = !{!6, !7} +!6 = !{i32 6191, i1 true} ; ImplementInRegisterMapINTEL = true +!7 = !{i32 6190, i32 1} ; InitModeINTEL = 1 diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/ptr-annotation.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/ptr-annotation.ll new file mode 100644 index 0000000000000..06f1d0bf7fd37 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/ptr-annotation.ll @@ -0,0 +1,41 @@ +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: OpName %[[#Foo:]] "foo" +; CHECK-DAG: OpName %[[#Ptr1:]] "_arg1" +; CHECK-DAG: OpName %[[#Ptr2:]] "_arg2" +; CHECK-DAG: OpName %[[#Ptr3:]] "_arg3" +; CHECK-DAG: OpName %[[#Ptr4:]] "_arg4" +; CHECK-DAG: OpName %[[#Ptr5:]] "_arg5" +; CHECK-DAG: OpDecorate %[[#Ptr1]] NonReadable +; CHECK-DAG: OpDecorate %[[#Ptr2]] Alignment 128 +; CHECK-DAG: OpDecorate %[[#Ptr2]] NonReadable +; CHECK-DAG: OpDecorate %[[#Ptr3]] Alignment 128 +; CHECK-DAG: OpDecorate %[[#Ptr3]] NonReadable +; CHECK-DAG: OpDecorate %[[#Ptr4]] Alignment 128 +; CHECK-DAG: OpDecorate %[[#Ptr4]] NonReadable +; CHECK-DAG: OpDecorate %[[#Ptr5]] UserSemantic "Unknown format" +; CHECK: %[[#Foo]] = OpFunction +; CHECK-NEXT: %[[#Ptr1]] = OpFunctionParameter +; CHECK-NEXT: %[[#Ptr2]] = OpFunctionParameter +; CHECK-NEXT: %[[#Ptr3]] = OpFunctionParameter +; CHECK-NEXT: %[[#Ptr4]] = OpFunctionParameter +; CHECK-NEXT: %[[#Ptr5]] = OpFunctionParameter +; CHECK: OpFunctionEnd + +@.str.0 = private unnamed_addr addrspace(1) constant [16 x i8] c"../prefetch.hpp\00", section "llvm.metadata" +@.str.1 = private unnamed_addr addrspace(1) constant [5 x i8] c"{25}\00", section "llvm.metadata" +@.str.2 = private unnamed_addr addrspace(1) constant [13 x i8] c"{44:128}{25}\00", section "llvm.metadata" +@.str.3 = private unnamed_addr addrspace(1) constant [15 x i8] c"{44:\22128\22}{25}\00", section "llvm.metadata" +@.str.4 = private unnamed_addr addrspace(1) constant [13 x i8] c"{44,128}{25}\00", section "llvm.metadata" +@.str.5 = private unnamed_addr addrspace(1) constant [15 x i8] c"Unknown format\00", section "llvm.metadata" + +define spir_kernel void @foo(ptr addrspace(1) %_arg1, ptr addrspace(1) %_arg2, ptr addrspace(1) %_arg3, ptr addrspace(1) %_arg4, ptr addrspace(1) %_arg5) { +entry: + %r1 = tail call ptr addrspace(1) @llvm.ptr.annotation.p1.p1(ptr addrspace(1) %_arg1, ptr addrspace(1) @.str.1, ptr addrspace(1) @.str.0, i32 80, ptr addrspace(1) null) + %r2 = tail call ptr addrspace(1) @llvm.ptr.annotation.p1.p1(ptr addrspace(1) %_arg2, ptr addrspace(1) @.str.2, ptr addrspace(1) @.str.0, i32 80, ptr addrspace(1) null) + %r3 = tail call ptr addrspace(1) @llvm.ptr.annotation.p1.p1(ptr addrspace(1) %_arg3, ptr addrspace(1) @.str.3, ptr addrspace(1) @.str.0, i32 80, ptr addrspace(1) null) + %r4 = tail call ptr addrspace(1) @llvm.ptr.annotation.p1.p1(ptr addrspace(1) %_arg4, ptr addrspace(1) @.str.4, ptr addrspace(1) @.str.0, i32 80, ptr addrspace(1) null) + %r5 = tail call ptr addrspace(1) @llvm.ptr.annotation.p1.p1(ptr addrspace(1) %_arg5, ptr addrspace(1) @.str.5, ptr addrspace(1) @.str.0, i32 80, ptr addrspace(1) null) + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/phi-spvintrinsic-dominate.ll b/llvm/test/CodeGen/SPIRV/phi-spvintrinsic-dominate.ll new file mode 100644 index 0000000000000..471ab03ed89f6 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/phi-spvintrinsic-dominate.ll @@ -0,0 +1,39 @@ +; The goal of the test is to check that newly inserted internal (spv) +; intrinsic functions for PHI's operands are inserted at the correct +; positions, and don't break rules of instruction domination and PHI nodes +; grouping at top of basic block. + +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpFunction +; CHECK: OpBranch +; CHECK: OpLabel +; CHECK: OpPhi +; CHECK: OpPhi +; CHECK: OpPhi + +define spir_kernel void @foo(ptr addrspace(1) %_arg1) { +entry: + br label %l1 + +l1: + %sw = phi <4 x double> [ %vec, %l2 ], [ , %entry ] + %in = phi <3 x double> [ %ins, %l2 ], [ zeroinitializer, %entry ] + %r1 = phi i32 [ %r2, %l2 ], [ 0, %entry ] + %c1 = icmp ult i32 %r1, 3 + br i1 %c1, label %l2, label %exit + +l2: + %r3 = zext nneg i32 %r1 to i64 + %r4 = getelementptr inbounds double, ptr addrspace(1) %_arg1, i64 %r3 + %r5 = load double, ptr addrspace(1) %r4, align 8 + %ins = insertelement <3 x double> %in, double %r5, i32 %r1 + %exp = shufflevector <3 x double> %ins, <3 x double> poison, <4 x i32> + %vec = shufflevector <4 x double> %exp, <4 x double> %sw, <4 x i32> + %r2 = add nuw nsw i32 %r1, 1 + br label %l1 + +exit: + ret void +} diff --git a/llvm/test/CodeGen/Thumb2/mve-tailpred-vptblock.ll b/llvm/test/CodeGen/Thumb2/mve-tailpred-vptblock.ll new file mode 100644 index 0000000000000..f9b3757bb6d2c --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-tailpred-vptblock.ll @@ -0,0 +1,197 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s + +; This loop has a vpt block that should not block tailpredication +define void @convert_vptblock(ptr %pchTarget, i16 signext %iTargetStride, ptr %pwLineMask, ptr %ptCopySize, i8 zeroext %chColour, i8 zeroext %chOpacity) { +; CHECK-LABEL: convert_vptblock: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: ldrsh.w r12, [r3, #2] +; CHECK-NEXT: cmp.w r12, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB0_1: @ %for.body.lr.ph +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: ldrsh.w r10, [r3] +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: ldrd r4, r5, [sp, #88] +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: cmp.w r10, #8 +; CHECK-NEXT: mov.w r0, #1 +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: mov.w r11, #0 +; CHECK-NEXT: it ge +; CHECK-NEXT: movge r3, #8 +; CHECK-NEXT: vidup.u16 q0, r8, #4 +; CHECK-NEXT: sub.w r3, r10, r3 +; CHECK-NEXT: vmov.i32 q1, #0x0 +; CHECK-NEXT: adds r3, #7 +; CHECK-NEXT: vmov.i16 q2, #0x100 +; CHECK-NEXT: vmov.i16 q3, #0xff +; CHECK-NEXT: add.w r9, r0, r3, lsr #3 +; CHECK-NEXT: .LBB0_2: @ %for.body +; CHECK-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-NEXT: @ Child Loop BB0_3 Depth 2 +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: mov r6, r8 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: dls lr, r9 +; CHECK-NEXT: .LBB0_3: @ %do.body +; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1 +; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-NEXT: vctp.16 r3 +; CHECK-NEXT: vpst +; CHECK-NEXT: vldrbt.u16 q5, [r2, q4] +; CHECK-NEXT: vmul.i16 q4, q5, r5 +; CHECK-NEXT: vshr.u16 q4, q4, #8 +; CHECK-NEXT: vsub.i16 q5, q2, q4 +; CHECK-NEXT: vpt.i16 eq, q4, q3 +; CHECK-NEXT: vmovt q5, q1 +; CHECK-NEXT: vctp.16 r3 +; CHECK-NEXT: vpst +; CHECK-NEXT: vldrbt.u16 q6, [r0] +; CHECK-NEXT: vsub.i16 q4, q2, q5 +; CHECK-NEXT: subs r3, #8 +; CHECK-NEXT: vmul.i16 q5, q5, q6 +; CHECK-NEXT: vmla.i16 q5, q4, r4 +; CHECK-NEXT: vshr.u16 q4, q5, #8 +; CHECK-NEXT: vpst +; CHECK-NEXT: vstrbt.16 q4, [r0], #8 +; CHECK-NEXT: vidup.u16 q4, r6, #4 +; CHECK-NEXT: le lr, .LBB0_3 +; CHECK-NEXT: @ %bb.4: @ %do.end +; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: add.w r0, r11, #1 +; CHECK-NEXT: add r7, r1 +; CHECK-NEXT: sxth.w r11, r0 +; CHECK-NEXT: cmp r11, r12 +; CHECK-NEXT: blt .LBB0_2 +; CHECK-NEXT: @ %bb.5: +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: bx lr +entry: + %iHeight1 = getelementptr inbounds i8, ptr %ptCopySize, i32 2 + %0 = load i16, ptr %iHeight1, align 2 + %cmp28 = icmp sgt i16 %0, 0 + br i1 %cmp28, label %for.body.lr.ph, label %for.cond.cleanup + +for.body.lr.ph: ; preds = %entry + %1 = load i16, ptr %ptCopySize, align 2 + %conv5 = sext i16 %1 to i32 + %2 = tail call { <8 x i16>, i32 } @llvm.arm.mve.vidup.v8i16(i32 0, i32 4) + %conv6 = zext i8 %chOpacity to i16 + %.splatinsert = insertelement <8 x i16> poison, i16 %conv6, i64 0 + %.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> poison, <8 x i32> zeroinitializer + %conv7 = zext i8 %chColour to i16 + %.splatinsert.i = insertelement <8 x i16> poison, i16 %conv7, i64 0 + %.splat.i = shufflevector <8 x i16> %.splatinsert.i, <8 x i16> poison, <8 x i32> zeroinitializer + %conv11 = sext i16 %iTargetStride to i32 + br label %for.body + +for.cond.cleanup: ; preds = %do.end, %entry + ret void + +for.body: ; preds = %for.body.lr.ph, %do.end + %pchTarget.addr.030 = phi ptr [ %pchTarget, %for.body.lr.ph ], [ %add.ptr12, %do.end ] + %y.029 = phi i16 [ 0, %for.body.lr.ph ], [ %inc, %do.end ] + br label %do.body + +do.body: ; preds = %do.body, %for.body + %blkCnt.0 = phi i32 [ %conv5, %for.body ], [ %sub8, %do.body ] + %.pn = phi { <8 x i16>, i32 } [ %2, %for.body ], [ %13, %do.body ] + %pchTargetLine.0 = phi ptr [ %pchTarget.addr.030, %for.body ], [ %add.ptr, %do.body ] + %vStride4Offs.0 = extractvalue { <8 x i16>, i32 } %.pn, 0 + %incr.0 = extractvalue { <8 x i16>, i32 } %.pn, 1 + %3 = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %blkCnt.0) + %4 = tail call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr %pwLineMask, <8 x i16> %vStride4Offs.0, i32 8, i32 0, i32 1, <8 x i1> %3) + %5 = mul <8 x i16> %4, %.splat + %shr = lshr <8 x i16> %5, + %6 = icmp eq <8 x i16> %shr, + %7 = sub nuw nsw <8 x i16> , %shr + %sub = select <8 x i1> %6, <8 x i16> zeroinitializer, <8 x i16> %7 + %8 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %pchTargetLine.0, i32 1, <8 x i1> %3, <8 x i8> zeroinitializer) + %9 = zext <8 x i8> %8 to <8 x i16> + %sub.i = sub nsw <8 x i16> , %sub + %10 = mul <8 x i16> %sub.i, %.splat.i + %11 = mul <8 x i16> %sub, %9 + %add.i = add <8 x i16> %10, %11 + %shr.i = lshr <8 x i16> %add.i, + %12 = trunc nuw <8 x i16> %shr.i to <8 x i8> + tail call void @llvm.masked.store.v8i8.p0(<8 x i8> %12, ptr %pchTargetLine.0, i32 1, <8 x i1> %3) + %13 = tail call { <8 x i16>, i32 } @llvm.arm.mve.vidup.v8i16(i32 %incr.0, i32 4) + %add.ptr = getelementptr inbounds i8, ptr %pchTargetLine.0, i32 8 + %sub8 = add nsw i32 %blkCnt.0, -8 + %cmp9 = icmp sgt i32 %blkCnt.0, 8 + br i1 %cmp9, label %do.body, label %do.end + +do.end: ; preds = %do.body + %add.ptr12 = getelementptr inbounds i8, ptr %pchTarget.addr.030, i32 %conv11 + %inc = add nuw nsw i16 %y.029, 1 + %cmp = icmp slt i16 %inc, %0 + br i1 %cmp, label %for.body, label %for.cond.cleanup +} + +; This loop has an else predicate on the vqshl, which is not very realistic but +; prevents us from converting to a vptblock without being able to remove it. +define i32 @else(ptr %s1, ptr %s2, i32 %x, ptr %d, i32 %n) { +; CHECK-LABEL: else: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: ldr r2, [sp, #8] +; CHECK-NEXT: cmp r2, #4 +; CHECK-NEXT: mov r3, r2 +; CHECK-NEXT: it ge +; CHECK-NEXT: movge r3, #4 +; CHECK-NEXT: subs r3, r2, r3 +; CHECK-NEXT: add.w r12, r3, #3 +; CHECK-NEXT: movs r3, #1 +; CHECK-NEXT: add.w r12, r3, r12, lsr #2 +; CHECK-NEXT: movs r3, #98 +; CHECK-NEXT: dls lr, r12 +; CHECK-NEXT: .LBB1_1: @ %do.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vctp.32 r2 +; CHECK-NEXT: subs r2, #4 +; CHECK-NEXT: vpstt +; CHECK-NEXT: vldrwt.u32 q1, [r1], #16 +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: vmov q2, q1 +; CHECK-NEXT: vpstet +; CHECK-NEXT: vqdmlsdht.s32 q2, q1, q0 +; CHECK-NEXT: vqshle.u32 q2, r3 +; CHECK-NEXT: vstrwt.32 q2, [r0], #16 +; CHECK-NEXT: le lr, .LBB1_1 +; CHECK-NEXT: @ %bb.2: @ %do.end +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: pop {r7, pc} +entry: + br label %do.body + +do.body: ; preds = %do.body, %entry + %n.addr.0 = phi i32 [ %n, %entry ], [ %sub, %do.body ] + %s2.addr.0 = phi ptr [ %s2, %entry ], [ %add.ptr1, %do.body ] + %s1.addr.0 = phi ptr [ %s1, %entry ], [ %add.ptr, %do.body ] + %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %n.addr.0) + %1 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %s1.addr.0, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %s2.addr.0, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %3 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> %2, <4 x i32> %1, i32 0, i32 0, i32 1, <4 x i1> %0) + %4 = xor <4 x i1> %0, + %5 = tail call <4 x i32> @llvm.arm.mve.vshl.scalar.predicated.v4i32.v4i1(<4 x i32> %3, i32 98, i32 1, i32 0, i32 1, <4 x i1> %4) + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %5, ptr %s1.addr.0, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds i8, ptr %s1.addr.0, i32 16 + %add.ptr1 = getelementptr inbounds i8, ptr %s2.addr.0, i32 16 + %sub = add nsw i32 %n.addr.0, -4 + %cmp = icmp sgt i32 %n.addr.0, 4 + br i1 %cmp, label %do.body, label %do.end + +do.end: ; preds = %do.body + ret i32 0 +} diff --git a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh-legacy.ll similarity index 100% rename from llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll rename to llvm/test/CodeGen/WebAssembly/cfg-stackify-eh-legacy.ll diff --git a/llvm/test/CodeGen/WebAssembly/eh-option-errors.ll b/llvm/test/CodeGen/WebAssembly/eh-option-errors.ll new file mode 100644 index 0000000000000..52a6364e12258 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/eh-option-errors.ll @@ -0,0 +1,22 @@ +target triple = "wasm32-unknown-unknown" + +; RUN: not --crash llc < %s -enable-emscripten-cxx-exceptions -wasm-enable-eh 2>&1 | FileCheck %s --check-prefix=EM_EH_W_WASM_EH +; EM_EH_W_WASM_EH: LLVM ERROR: -enable-emscripten-cxx-exceptions not allowed with -wasm-enable-eh + +; RUN: not --crash llc < %s -enable-emscripten-sjlj -wasm-enable-sjlj 2>&1 | FileCheck %s --check-prefix=EM_SJLJ_W_WASM_SJLJ +; EM_SJLJ_W_WASM_SJLJ: LLVM ERROR: -enable-emscripten-sjlj not allowed with -wasm-enable-sjlj + +; RUN: not --crash llc < %s -enable-emscripten-cxx-exceptions -wasm-enable-sjlj 2>&1 | FileCheck %s --check-prefix=EM_EH_W_WASM_SJLJ +; EM_EH_W_WASM_SJLJ: LLVM ERROR: -enable-emscripten-cxx-exceptions not allowed with -wasm-enable-sjlj + +; RUN: not --crash llc < %s -wasm-enable-exnref 2>&1 | FileCheck %s --check-prefix=WASM_EXNREF_ONLY +; WASM_EXNREF_ONLY: LLVM ERROR: -wasm-enable-exnref should be used with -wasm-enable-eh + +; RUN: not --crash llc < %s -wasm-enable-eh -exception-model=dwarf 2>&1 | FileCheck %s --check-prefix=EH_MODEL_DWARF +; EH_MODEL_DWARF: LLVM ERROR: -exception-model should be either 'none' or 'wasm' + +; RUN: not --crash llc < %s -enable-emscripten-cxx-exceptions -exception-model=wasm 2>&1 | FileCheck %s --check-prefix=EM_EH_W_MODEL_WASM +; EM_EH_W_MODEL_WASM: LLVM ERROR: -exception-model=wasm not allowed with -enable-emscripten-cxx-exceptions + +; RUN: not --crash llc < %s -exception-model=wasm 2>&1 | FileCheck %s --check-prefix=MODEL_WASM_WO_WASM_EH_SJLJ +; MODEL_WASM_WO_WASM_EH_SJLJ: LLVM ERROR: -exception-model=wasm only allowed with at least one of -wasm-enable-eh or -wasm-enable-sjlj diff --git a/llvm/test/CodeGen/WebAssembly/exception.ll b/llvm/test/CodeGen/WebAssembly/exception-legacy.ll similarity index 100% rename from llvm/test/CodeGen/WebAssembly/exception.ll rename to llvm/test/CodeGen/WebAssembly/exception-legacy.ll diff --git a/llvm/test/CodeGen/WebAssembly/half-precision.ll b/llvm/test/CodeGen/WebAssembly/half-precision.ll index d9d3f6be800fd..73ccea8d652db 100644 --- a/llvm/test/CodeGen/WebAssembly/half-precision.ll +++ b/llvm/test/CodeGen/WebAssembly/half-precision.ll @@ -35,3 +35,71 @@ define float @extract_lane_v8f16(<8 x half> %v) { %r = call float @llvm.wasm.extract.lane.f16x8(<8 x half> %v, i32 1) ret float %r } + +; CHECK-LABEL: add_v8f16: +; CHECK: f16x8.add $push0=, $0, $1 +; CHECK-NEXT: return $pop0 +define <8 x half> @add_v8f16(<8 x half> %a, <8 x half> %b) { + %r = fadd <8 x half> %a, %b + ret <8 x half> %r +} + +; CHECK-LABEL: sub_v8f16: +; CHECK: f16x8.sub $push0=, $0, $1 +; CHECK-NEXT: return $pop0 +define <8 x half> @sub_v8f16(<8 x half> %a, <8 x half> %b) { + %r = fsub <8 x half> %a, %b + ret <8 x half> %r +} + +; CHECK-LABEL: mul_v8f16: +; CHECK: f16x8.mul $push0=, $0, $1 +; CHECK-NEXT: return $pop0 +define <8 x half> @mul_v8f16(<8 x half> %a, <8 x half> %b) { + %r = fmul <8 x half> %a, %b + ret <8 x half> %r +} + +; CHECK-LABEL: div_v8f16: +; CHECK: f16x8.div $push0=, $0, $1 +; CHECK-NEXT: return $pop0 +define <8 x half> @div_v8f16(<8 x half> %a, <8 x half> %b) { + %r = fdiv <8 x half> %a, %b + ret <8 x half> %r +} + +; CHECK-LABEL: min_intrinsic_v8f16: +; CHECK: f16x8.min $push0=, $0, $1 +; CHECK-NEXT: return $pop0 +declare <8 x half> @llvm.minimum.v8f16(<8 x half>, <8 x half>) +define <8 x half> @min_intrinsic_v8f16(<8 x half> %x, <8 x half> %y) { + %a = call <8 x half> @llvm.minimum.v8f16(<8 x half> %x, <8 x half> %y) + ret <8 x half> %a +} + +; CHECK-LABEL: max_intrinsic_v8f16: +; CHECK: f16x8.max $push0=, $0, $1 +; CHECK-NEXT: return $pop0 +declare <8 x half> @llvm.maximum.v8f16(<8 x half>, <8 x half>) +define <8 x half> @max_intrinsic_v8f16(<8 x half> %x, <8 x half> %y) { + %a = call <8 x half> @llvm.maximum.v8f16(<8 x half> %x, <8 x half> %y) + ret <8 x half> %a +} + +; CHECK-LABEL: pmin_intrinsic_v8f16: +; CHECK: f16x8.pmin $push0=, $0, $1 +; CHECK-NEXT: return $pop0 +declare <8 x half> @llvm.wasm.pmin.v8f16(<8 x half>, <8 x half>) +define <8 x half> @pmin_intrinsic_v8f16(<8 x half> %a, <8 x half> %b) { + %v = call <8 x half> @llvm.wasm.pmin.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %v +} + +; CHECK-LABEL: pmax_intrinsic_v8f16: +; CHECK: f16x8.pmax $push0=, $0, $1 +; CHECK-NEXT: return $pop0 +declare <8 x half> @llvm.wasm.pmax.v8f16(<8 x half>, <8 x half>) +define <8 x half> @pmax_intrinsic_v8f16(<8 x half> %a, <8 x half> %b) { + %v = call <8 x half> @llvm.wasm.pmax.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %v +} diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-ehsjlj-options.ll b/llvm/test/CodeGen/WebAssembly/lower-em-ehsjlj-options.ll index 4a63c812d6ae9..66872a5422986 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-em-ehsjlj-options.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-em-ehsjlj-options.ll @@ -1,7 +1,6 @@ ; RUN: llc < %s -enable-emscripten-cxx-exceptions | FileCheck %s --check-prefix=EH ; RUN: llc < %s -enable-emscripten-sjlj | FileCheck %s --check-prefix=SJLJ ; RUN: llc < %s | FileCheck %s --check-prefix=NONE -; RUN: not --crash llc < %s -enable-emscripten-cxx-exceptions -exception-model=wasm 2>&1 | FileCheck %s --check-prefix=WASM-EH-EM-EH target triple = "wasm32-unknown-unknown" @@ -97,5 +96,3 @@ declare void @free(ptr) attributes #0 = { returns_twice } attributes #1 = { noreturn } attributes #2 = { nounwind } - -; WASM-EH-EM-EH: LLVM ERROR: -exception-model=wasm not allowed with -enable-emscripten-cxx-exceptions diff --git a/llvm/test/CodeGen/WebAssembly/reg-argument.mir b/llvm/test/CodeGen/WebAssembly/reg-argument.mir index 23e66dfc71fa1..a549990bdb0a2 100644 --- a/llvm/test/CodeGen/WebAssembly/reg-argument.mir +++ b/llvm/test/CodeGen/WebAssembly/reg-argument.mir @@ -68,3 +68,14 @@ body: | %1:externref = ARGUMENT_externref 0, implicit $arguments RETURN implicit-def $arguments ... +--- +name: argument_exnref +# CHECK-LABEL: argument_exnref +body: | + ; CHECK-LABEL: bb.0: + ; CHECK-NEXT: %1:exnref = ARGUMENT_exnref 0 + bb.0: + %0:i32 = CONST_I32 0, implicit-def $arguments + %1:exnref = ARGUMENT_exnref 0, implicit $arguments + RETURN implicit-def $arguments +... diff --git a/llvm/test/CodeGen/WebAssembly/reg-copy.mir b/llvm/test/CodeGen/WebAssembly/reg-copy.mir index 31a5bfa63a4ea..763fe42d07b61 100644 --- a/llvm/test/CodeGen/WebAssembly/reg-copy.mir +++ b/llvm/test/CodeGen/WebAssembly/reg-copy.mir @@ -77,3 +77,14 @@ body: | %0:externref = COPY %1:externref RETURN implicit-def $arguments ... +--- +name: copy_exnref +# CHECK-LABEL: copy_exnref +body: | + ; CHECK-LABEL: bb.0: + ; CHECK-NEXT: %0:exnref = COPY_EXNREF %1:exnref + ; CHECK-NEXT: RETURN + bb.0: + %0:exnref = COPY %1:exnref + RETURN implicit-def $arguments +... diff --git a/llvm/test/CodeGen/X86/apx/add.ll b/llvm/test/CodeGen/X86/apx/add.ll index d3301ecdb72d0..7779ae599f200 100644 --- a/llvm/test/CodeGen/X86/apx/add.ll +++ b/llvm/test/CodeGen/X86/apx/add.ll @@ -1,11 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd -verify-machineinstrs --show-mc-encoding | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd,nf -verify-machineinstrs --show-mc-encoding | FileCheck --check-prefix=NF %s define i8 @add8rr(i8 noundef %a, i8 noundef %b) { ; CHECK-LABEL: add8rr: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addb %sil, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0x00,0xf7] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add8rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addb %sil, %dil, %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x00,0xf7] +; NF-NEXT: retq # encoding: [0xc3] entry: %add = add i8 %a, %b ret i8 %add @@ -17,6 +23,12 @@ define i16 @add16rr(i16 noundef %a, i16 noundef %b) { ; CHECK-NEXT: addl %esi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xf7] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add16rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addl %esi, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x01,0xf7] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %add = add i16 %a, %b ret i16 %add @@ -27,6 +39,11 @@ define i32 @add32rr(i32 noundef %a, i32 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addl %esi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xf7] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add32rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addl %esi, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x01,0xf7] +; NF-NEXT: retq # encoding: [0xc3] entry: %add = add i32 %a, %b ret i32 %add @@ -37,6 +54,11 @@ define i64 @add64rr(i64 noundef %a, i64 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addq %rsi, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x01,0xf7] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add64rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addq %rsi, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x01,0xf7] +; NF-NEXT: retq # encoding: [0xc3] entry: %add = add i64 %a, %b ret i64 %add @@ -47,6 +69,11 @@ define i8 @add8rm(i8 noundef %a, ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addb (%rsi), %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0x02,0x3e] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add8rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addb (%rsi), %dil, %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x02,0x3e] +; NF-NEXT: retq # encoding: [0xc3] entry: %b = load i8, ptr %ptr %add = add i8 %a, %b @@ -58,6 +85,11 @@ define i16 @add16rm(i16 noundef %a, ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addw (%rsi), %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x03,0x3e] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add16rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addw (%rsi), %di, %ax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7d,0x1c,0x03,0x3e] +; NF-NEXT: retq # encoding: [0xc3] entry: %b = load i16, ptr %ptr %add = add i16 %a, %b @@ -69,6 +101,11 @@ define i32 @add32rm(i32 noundef %a, ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addl (%rsi), %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x03,0x3e] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add32rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addl (%rsi), %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x03,0x3e] +; NF-NEXT: retq # encoding: [0xc3] entry: %b = load i32, ptr %ptr %add = add i32 %a, %b @@ -80,6 +117,11 @@ define i64 @add64rm(i64 noundef %a, ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addq (%rsi), %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x03,0x3e] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add64rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addq (%rsi), %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x03,0x3e] +; NF-NEXT: retq # encoding: [0xc3] entry: %b = load i64, ptr %ptr %add = add i64 %a, %b @@ -92,6 +134,12 @@ define i16 @add16ri8(i16 noundef %a) { ; CHECK-NEXT: addl $123, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xc7,0x7b] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add16ri8: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addl $123, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x83,0xc7,0x7b] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %add = add i16 %a, 123 ret i16 %add @@ -102,6 +150,11 @@ define i32 @add32ri8(i32 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addl $123, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xc7,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add32ri8: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addl $123, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x83,0xc7,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %add = add i32 %a, 123 ret i32 %add @@ -112,6 +165,11 @@ define i64 @add64ri8(i64 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addq $123, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0xc7,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add64ri8: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addq $123, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x83,0xc7,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %add = add i64 %a, 123 ret i64 %add @@ -122,6 +180,11 @@ define i8 @add8ri(i8 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addb $123, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0x80,0xc7,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add8ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addb $123, %dil, %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x80,0xc7,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %add = add i8 %a, 123 ret i8 %add @@ -134,6 +197,13 @@ define i16 @add16ri(i16 noundef %a) { ; CHECK-NEXT: # imm = 0x4D2 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add16ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addl $1234, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0xc7,0xd2,0x04,0x00,0x00] +; NF-NEXT: # imm = 0x4D2 +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %add = add i16 %a, 1234 ret i16 %add @@ -145,6 +215,12 @@ define i32 @add32ri(i32 noundef %a) { ; CHECK-NEXT: addl $123456, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0xc7,0x40,0xe2,0x01,0x00] ; CHECK-NEXT: # imm = 0x1E240 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add32ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addl $123456, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0xc7,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: retq # encoding: [0xc3] entry: %add = add i32 %a, 123456 ret i32 %add @@ -156,6 +232,12 @@ define i64 @add64ri(i64 noundef %a) { ; CHECK-NEXT: addq $123456, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0xc7,0x40,0xe2,0x01,0x00] ; CHECK-NEXT: # imm = 0x1E240 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add64ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addq $123456, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x81,0xc7,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: retq # encoding: [0xc3] entry: %add = add i64 %a, 123456 ret i64 %add @@ -166,6 +248,11 @@ define i8 @add8mr(ptr %a, i8 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addb %sil, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0x00,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add8mr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addb %sil, (%rdi), %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x00,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i8, ptr %a %add = add nsw i8 %t, %b @@ -177,6 +264,11 @@ define i16 @add16mr(ptr %a, i16 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addw %si, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0x01,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add16mr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addw %si, (%rdi), %ax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7d,0x1c,0x01,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i16, ptr %a %add = add nsw i16 %t, %b @@ -188,6 +280,11 @@ define i32 @add32mr(ptr %a, i32 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addl %esi, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add32mr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addl %esi, (%rdi), %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x01,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i32, ptr %a %add = add nsw i32 %t, %b @@ -199,6 +296,11 @@ define i64 @add64mr(ptr %a, i64 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addq %rsi, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x01,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add64mr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addq %rsi, (%rdi), %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x01,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i64, ptr %a %add = add nsw i64 %t, %b @@ -212,6 +314,13 @@ define i16 @add16mi8(ptr %a) { ; CHECK-NEXT: addl $123, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xc0,0x7b] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add16mi8: +; NF: # %bb.0: # %entry +; NF-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] +; NF-NEXT: addl $123, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xc0,0x7b] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i16, ptr %a %add = add nsw i16 %t, 123 @@ -223,6 +332,11 @@ define i32 @add32mi8(ptr %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addl $123, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0x07,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add32mi8: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addl $123, (%rdi), %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x83,0x07,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i32, ptr %a %add = add nsw i32 %t, 123 @@ -234,6 +348,11 @@ define i64 @add64mi8(ptr %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addq $123, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0x07,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add64mi8: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addq $123, (%rdi), %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x83,0x07,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i64, ptr %a %add = add nsw i64 %t, 123 @@ -245,6 +364,11 @@ define i8 @add8mi(ptr %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addb $123, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0x80,0x07,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add8mi: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addb $123, (%rdi), %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x80,0x07,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i8, ptr %a %add = add nsw i8 %t, 123 @@ -259,6 +383,14 @@ define i16 @add16mi(ptr %a) { ; CHECK-NEXT: # imm = 0x4D2 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add16mi: +; NF: # %bb.0: # %entry +; NF-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] +; NF-NEXT: addl $1234, %eax # EVEX TO LEGACY Compression encoding: [0x05,0xd2,0x04,0x00,0x00] +; NF-NEXT: # imm = 0x4D2 +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i16, ptr %a %add = add nsw i16 %t, 1234 @@ -271,6 +403,12 @@ define i32 @add32mi(ptr %a) { ; CHECK-NEXT: addl $123456, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0x07,0x40,0xe2,0x01,0x00] ; CHECK-NEXT: # imm = 0x1E240 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add32mi: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addl $123456, (%rdi), %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0x07,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i32, ptr %a %add = add nsw i32 %t, 123456 @@ -283,6 +421,12 @@ define i64 @add64mi(ptr %a) { ; CHECK-NEXT: addq $123456, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0x07,0x40,0xe2,0x01,0x00] ; CHECK-NEXT: # imm = 0x1E240 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add64mi: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addq $123456, (%rdi), %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x81,0x07,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i64, ptr %a %add = add nsw i64 %t, 123456 @@ -303,6 +447,15 @@ define i8 @addflag8rr(i8 noundef %a, i8 noundef %b) { ; CHECK-NEXT: cmovbl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x42,0xc1] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: addflag8rr: +; NF: # %bb.0: # %entry +; NF-NEXT: addb %sil, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0x00,0xf7] +; NF-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0] +; NF-NEXT: movl $255, %ecx # encoding: [0xb9,0xff,0x00,0x00,0x00] +; NF-NEXT: cmovbl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x42,0xc1] +; NF-NEXT: # kill: def $al killed $al killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %add = call i8 @llvm.uadd.sat.i8(i8 %a, i8 %b) ret i8 %add @@ -317,6 +470,15 @@ define i16 @addflag16rr(i16 noundef %a, i16 noundef %b) { ; CHECK-NEXT: cmovbl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x42,0xc1] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: addflag16rr: +; NF: # %bb.0: # %entry +; NF-NEXT: addw %si, %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x01,0xf7] +; NF-NEXT: movl $65535, %ecx # encoding: [0xb9,0xff,0xff,0x00,0x00] +; NF-NEXT: # imm = 0xFFFF +; NF-NEXT: cmovbl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x42,0xc1] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %add = call i16 @llvm.uadd.sat.i16(i16 %a, i16 %b) ret i16 %add @@ -329,6 +491,13 @@ define i32 @addflag32rr(i32 noundef %a, i32 noundef %b) { ; CHECK-NEXT: movl $-1, %ecx # encoding: [0xb9,0xff,0xff,0xff,0xff] ; CHECK-NEXT: cmovbl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x42,0xc1] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: addflag32rr: +; NF: # %bb.0: # %entry +; NF-NEXT: addl %esi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xf7] +; NF-NEXT: movl $-1, %ecx # encoding: [0xb9,0xff,0xff,0xff,0xff] +; NF-NEXT: cmovbl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x42,0xc1] +; NF-NEXT: retq # encoding: [0xc3] entry: %add = call i32 @llvm.uadd.sat.i32(i32 %a, i32 %b) ret i32 %add @@ -341,6 +510,13 @@ define i64 @addflag64rr(i64 noundef %a, i64 noundef %b) { ; CHECK-NEXT: movq $-1, %rcx # encoding: [0x48,0xc7,0xc1,0xff,0xff,0xff,0xff] ; CHECK-NEXT: cmovbq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x42,0xc1] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: addflag64rr: +; NF: # %bb.0: # %entry +; NF-NEXT: addq %rsi, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x01,0xf7] +; NF-NEXT: movq $-1, %rcx # encoding: [0x48,0xc7,0xc1,0xff,0xff,0xff,0xff] +; NF-NEXT: cmovbq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x42,0xc1] +; NF-NEXT: retq # encoding: [0xc3] entry: %add = call i64 @llvm.uadd.sat.i64(i64 %a, i64 %b) ret i64 %add @@ -355,6 +531,15 @@ define i8 @addflag8rm(i8 noundef %a, ptr %b) { ; CHECK-NEXT: cmovbl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x42,0xc1] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: addflag8rm: +; NF: # %bb.0: # %entry +; NF-NEXT: addb (%rsi), %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0x02,0x3e] +; NF-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0] +; NF-NEXT: movl $255, %ecx # encoding: [0xb9,0xff,0x00,0x00,0x00] +; NF-NEXT: cmovbl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x42,0xc1] +; NF-NEXT: # kill: def $al killed $al killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %t = load i8, ptr %b %add = call i8 @llvm.uadd.sat.i8(i8 %a, i8 %t) @@ -370,6 +555,15 @@ define i16 @addflag16rm(i16 noundef %a, ptr %b) { ; CHECK-NEXT: cmovbl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x42,0xc1] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: addflag16rm: +; NF: # %bb.0: # %entry +; NF-NEXT: addw (%rsi), %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x03,0x3e] +; NF-NEXT: movl $65535, %ecx # encoding: [0xb9,0xff,0xff,0x00,0x00] +; NF-NEXT: # imm = 0xFFFF +; NF-NEXT: cmovbl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x42,0xc1] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %t = load i16, ptr %b %add = call i16 @llvm.uadd.sat.i16(i16 %a, i16 %t) @@ -383,6 +577,13 @@ define i32 @addflag32rm(i32 noundef %a, ptr %b) { ; CHECK-NEXT: movl $-1, %ecx # encoding: [0xb9,0xff,0xff,0xff,0xff] ; CHECK-NEXT: cmovbl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x42,0xc1] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: addflag32rm: +; NF: # %bb.0: # %entry +; NF-NEXT: addl (%rsi), %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x03,0x3e] +; NF-NEXT: movl $-1, %ecx # encoding: [0xb9,0xff,0xff,0xff,0xff] +; NF-NEXT: cmovbl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x42,0xc1] +; NF-NEXT: retq # encoding: [0xc3] entry: %t = load i32, ptr %b %add = call i32 @llvm.uadd.sat.i32(i32 %a, i32 %t) @@ -396,6 +597,13 @@ define i64 @addflag64rm(i64 noundef %a, ptr %b) { ; CHECK-NEXT: movq $-1, %rcx # encoding: [0x48,0xc7,0xc1,0xff,0xff,0xff,0xff] ; CHECK-NEXT: cmovbq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x42,0xc1] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: addflag64rm: +; NF: # %bb.0: # %entry +; NF-NEXT: addq (%rsi), %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x03,0x3e] +; NF-NEXT: movq $-1, %rcx # encoding: [0x48,0xc7,0xc1,0xff,0xff,0xff,0xff] +; NF-NEXT: cmovbq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x42,0xc1] +; NF-NEXT: retq # encoding: [0xc3] entry: %t = load i64, ptr %b %add = call i64 @llvm.uadd.sat.i64(i64 %a, i64 %t) @@ -411,6 +619,15 @@ define i16 @addflag16ri8(i16 noundef %a) { ; CHECK-NEXT: cmovbl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x42,0xc1] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: addflag16ri8: +; NF: # %bb.0: # %entry +; NF-NEXT: addw $123, %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x83,0xc7,0x7b] +; NF-NEXT: movl $65535, %ecx # encoding: [0xb9,0xff,0xff,0x00,0x00] +; NF-NEXT: # imm = 0xFFFF +; NF-NEXT: cmovbl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x42,0xc1] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %add = call i16 @llvm.uadd.sat.i16(i16 %a, i16 123) ret i16 %add @@ -423,6 +640,13 @@ define i32 @addflag32ri8(i32 noundef %a) { ; CHECK-NEXT: movl $-1, %ecx # encoding: [0xb9,0xff,0xff,0xff,0xff] ; CHECK-NEXT: cmovbl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x42,0xc1] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: addflag32ri8: +; NF: # %bb.0: # %entry +; NF-NEXT: addl $123, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xc7,0x7b] +; NF-NEXT: movl $-1, %ecx # encoding: [0xb9,0xff,0xff,0xff,0xff] +; NF-NEXT: cmovbl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x42,0xc1] +; NF-NEXT: retq # encoding: [0xc3] entry: %add = call i32 @llvm.uadd.sat.i32(i32 %a, i32 123) ret i32 %add @@ -435,6 +659,13 @@ define i64 @addflag64ri8(i64 noundef %a) { ; CHECK-NEXT: movq $-1, %rcx # encoding: [0x48,0xc7,0xc1,0xff,0xff,0xff,0xff] ; CHECK-NEXT: cmovbq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x42,0xc1] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: addflag64ri8: +; NF: # %bb.0: # %entry +; NF-NEXT: addq $123, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0xc7,0x7b] +; NF-NEXT: movq $-1, %rcx # encoding: [0x48,0xc7,0xc1,0xff,0xff,0xff,0xff] +; NF-NEXT: cmovbq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x42,0xc1] +; NF-NEXT: retq # encoding: [0xc3] entry: %add = call i64 @llvm.uadd.sat.i64(i64 %a, i64 123) ret i64 %add @@ -449,6 +680,15 @@ define i8 @addflag8ri(i8 noundef %a) { ; CHECK-NEXT: cmovbl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x42,0xc1] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: addflag8ri: +; NF: # %bb.0: # %entry +; NF-NEXT: addb $123, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0x80,0xc7,0x7b] +; NF-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0] +; NF-NEXT: movl $255, %ecx # encoding: [0xb9,0xff,0x00,0x00,0x00] +; NF-NEXT: cmovbl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x42,0xc1] +; NF-NEXT: # kill: def $al killed $al killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %add = call i8 @llvm.uadd.sat.i8(i8 %a, i8 123) ret i8 %add @@ -464,6 +704,16 @@ define i16 @addflag16ri(i16 noundef %a) { ; CHECK-NEXT: cmovbl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x42,0xc1] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: addflag16ri: +; NF: # %bb.0: # %entry +; NF-NEXT: addw $1234, %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x81,0xc7,0xd2,0x04] +; NF-NEXT: # imm = 0x4D2 +; NF-NEXT: movl $65535, %ecx # encoding: [0xb9,0xff,0xff,0x00,0x00] +; NF-NEXT: # imm = 0xFFFF +; NF-NEXT: cmovbl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x42,0xc1] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %add = call i16 @llvm.uadd.sat.i16(i16 %a, i16 1234) ret i16 %add @@ -477,6 +727,14 @@ define i32 @addflag32ri(i32 noundef %a) { ; CHECK-NEXT: movl $-1, %ecx # encoding: [0xb9,0xff,0xff,0xff,0xff] ; CHECK-NEXT: cmovbl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x42,0xc1] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: addflag32ri: +; NF: # %bb.0: # %entry +; NF-NEXT: addl $123456, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0xc7,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: movl $-1, %ecx # encoding: [0xb9,0xff,0xff,0xff,0xff] +; NF-NEXT: cmovbl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x42,0xc1] +; NF-NEXT: retq # encoding: [0xc3] entry: %add = call i32 @llvm.uadd.sat.i32(i32 %a, i32 123456) ret i32 %add @@ -490,6 +748,14 @@ define i64 @addflag64ri(i64 noundef %a) { ; CHECK-NEXT: movq $-1, %rcx # encoding: [0x48,0xc7,0xc1,0xff,0xff,0xff,0xff] ; CHECK-NEXT: cmovbq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x42,0xc1] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: addflag64ri: +; NF: # %bb.0: # %entry +; NF-NEXT: addq $123456, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0xc7,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: movq $-1, %rcx # encoding: [0x48,0xc7,0xc1,0xff,0xff,0xff,0xff] +; NF-NEXT: cmovbq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x42,0xc1] +; NF-NEXT: retq # encoding: [0xc3] entry: %add = call i64 @llvm.uadd.sat.i64(i64 %a, i64 123456) ret i64 %add @@ -507,6 +773,16 @@ define i1 @add64ri_reloc(i16 %k) { ; CHECK-NEXT: # fixup A - offset: 2, value: val, kind: reloc_signed_4byte ; CHECK-NEXT: setne %al # encoding: [0x0f,0x95,0xc0] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add64ri_reloc: +; NF: # %bb.0: +; NF-NEXT: # kill: def $edi killed $edi def $rdi +; NF-NEXT: movswq %di, %rax # encoding: [0x48,0x0f,0xbf,0xc7] +; NF-NEXT: addq %rax, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x01,0xc0] +; NF-NEXT: addq $val, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x05,A,A,A,A] +; NF-NEXT: # fixup A - offset: 2, value: val, kind: reloc_signed_4byte +; NF-NEXT: setne %al # encoding: [0x0f,0x95,0xc0] +; NF-NEXT: retq # encoding: [0xc3] %g = getelementptr inbounds i16, ptr @val, i16 %k %cmp = icmp ne ptr %g, null ret i1 %cmp @@ -517,6 +793,11 @@ define void @add8mr_legacy(ptr %a, i8 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addb %sil, (%rdi) # encoding: [0x40,0x00,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add8mr_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: addb %sil, (%rdi) # encoding: [0x40,0x00,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i8, ptr %a %add = add i8 %t, %b @@ -529,6 +810,11 @@ define void @add16mr_legacy(ptr %a, i16 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addw %si, (%rdi) # encoding: [0x66,0x01,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add16mr_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: addw %si, (%rdi) # encoding: [0x66,0x01,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i16, ptr %a %add = add i16 %t, %b @@ -541,6 +827,11 @@ define void @add32mr_legacy(ptr %a, i32 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addl %esi, (%rdi) # encoding: [0x01,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add32mr_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: addl %esi, (%rdi) # encoding: [0x01,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i32, ptr %a %add = add i32 %t, %b @@ -553,6 +844,11 @@ define void @add64mr_legacy(ptr %a, i64 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addq %rsi, (%rdi) # encoding: [0x48,0x01,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add64mr_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: addq %rsi, (%rdi) # encoding: [0x48,0x01,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i64, ptr %a %add = add i64 %t, %b @@ -565,6 +861,11 @@ define void @add8mi_legacy(ptr %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addb $123, (%rdi) # encoding: [0x80,0x07,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add8mi_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: addb $123, (%rdi) # encoding: [0x80,0x07,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i8, ptr %a %add = add nsw i8 %t, 123 @@ -578,6 +879,12 @@ define void @add16mi_legacy(ptr %a) { ; CHECK-NEXT: addw $1234, (%rdi) # encoding: [0x66,0x81,0x07,0xd2,0x04] ; CHECK-NEXT: # imm = 0x4D2 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add16mi_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: addw $1234, (%rdi) # encoding: [0x66,0x81,0x07,0xd2,0x04] +; NF-NEXT: # imm = 0x4D2 +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i16, ptr %a %add = add nsw i16 %t, 1234 @@ -591,6 +898,12 @@ define void @add32mi_legacy(ptr %a) { ; CHECK-NEXT: addl $123456, (%rdi) # encoding: [0x81,0x07,0x40,0xe2,0x01,0x00] ; CHECK-NEXT: # imm = 0x1E240 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add32mi_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: addl $123456, (%rdi) # encoding: [0x81,0x07,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i32, ptr %a %add = add nsw i32 %t, 123456 @@ -604,6 +917,12 @@ define void @add64mi_legacy(ptr %a) { ; CHECK-NEXT: addq $123456, (%rdi) # encoding: [0x48,0x81,0x07,0x40,0xe2,0x01,0x00] ; CHECK-NEXT: # imm = 0x1E240 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: add64mi_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: addq $123456, (%rdi) # encoding: [0x48,0x81,0x07,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i64, ptr %a %add = add nsw i64 %t, 123456 diff --git a/llvm/test/CodeGen/X86/apx/and.ll b/llvm/test/CodeGen/X86/apx/and.ll index af8f4119ac054..58f54fbe50a52 100644 --- a/llvm/test/CodeGen/X86/apx/and.ll +++ b/llvm/test/CodeGen/X86/apx/and.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd -verify-machineinstrs --show-mc-encoding | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd,nf -verify-machineinstrs --show-mc-encoding | FileCheck --check-prefix=NF %s define i8 @and8rr(i8 noundef %a, i8 noundef %b) { ; CHECK-LABEL: and8rr: @@ -7,6 +8,12 @@ define i8 @and8rr(i8 noundef %a, i8 noundef %b) { ; CHECK-NEXT: andl %esi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x21,0xf7] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and8rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} andl %esi, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x21,0xf7] +; NF-NEXT: # kill: def $al killed $al killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %and = and i8 %a, %b ret i8 %and @@ -18,6 +25,12 @@ define i16 @and16rr(i16 noundef %a, i16 noundef %b) { ; CHECK-NEXT: andl %esi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x21,0xf7] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and16rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} andl %esi, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x21,0xf7] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %and = and i16 %a, %b ret i16 %and @@ -28,6 +41,11 @@ define i32 @and32rr(i32 noundef %a, i32 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andl %esi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x21,0xf7] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and32rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} andl %esi, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x21,0xf7] +; NF-NEXT: retq # encoding: [0xc3] entry: %and = and i32 %a, %b ret i32 %and @@ -38,6 +56,11 @@ define i64 @and64rr(i64 noundef %a, i64 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andq %rsi, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x21,0xf7] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and64rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} andq %rsi, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x21,0xf7] +; NF-NEXT: retq # encoding: [0xc3] entry: %and = and i64 %a, %b ret i64 %and @@ -48,6 +71,11 @@ define i8 @and8rm(i8 noundef %a, ptr %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andb (%rsi), %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0x22,0x3e] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and8rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} andb (%rsi), %dil, %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x22,0x3e] +; NF-NEXT: retq # encoding: [0xc3] entry: %t = load i8, ptr %b %and = and i8 %a, %t @@ -59,6 +87,11 @@ define i16 @and16rm(i16 noundef %a, ptr %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andw (%rsi), %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x23,0x3e] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and16rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} andw (%rsi), %di, %ax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7d,0x1c,0x23,0x3e] +; NF-NEXT: retq # encoding: [0xc3] entry: %t = load i16, ptr %b %and = and i16 %a, %t @@ -70,6 +103,11 @@ define i32 @and32rm(i32 noundef %a, ptr %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andl (%rsi), %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x23,0x3e] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and32rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} andl (%rsi), %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x23,0x3e] +; NF-NEXT: retq # encoding: [0xc3] entry: %t = load i32, ptr %b %and = and i32 %a, %t @@ -81,6 +119,11 @@ define i64 @and64rm(i64 noundef %a, ptr %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andq (%rsi), %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x23,0x3e] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and64rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} andq (%rsi), %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x23,0x3e] +; NF-NEXT: retq # encoding: [0xc3] entry: %t = load i64, ptr %b %and = and i64 %a, %t @@ -93,6 +136,12 @@ define i16 @and16ri8(i16 noundef %a) { ; CHECK-NEXT: andl $123, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xe7,0x7b] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and16ri8: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} andl $123, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x83,0xe7,0x7b] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %and = and i16 %a, 123 ret i16 %and @@ -103,6 +152,11 @@ define i32 @and32ri8(i32 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andl $123, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xe7,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and32ri8: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} andl $123, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x83,0xe7,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %and = and i32 %a, 123 ret i32 %and @@ -113,6 +167,11 @@ define i64 @and64ri8(i64 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andl $123, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xe7,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and64ri8: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} andl $123, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x83,0xe7,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %and = and i64 %a, 123 ret i64 %and @@ -123,6 +182,11 @@ define i8 @and8ri(i8 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andb $123, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0x80,0xe7,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and8ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} andb $123, %dil, %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x80,0xe7,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %and = and i8 %a, 123 ret i8 %and @@ -135,6 +199,13 @@ define i16 @and16ri(i16 noundef %a) { ; CHECK-NEXT: # imm = 0x4D2 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and16ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} andl $1234, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0xe7,0xd2,0x04,0x00,0x00] +; NF-NEXT: # imm = 0x4D2 +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %and = and i16 %a, 1234 ret i16 %and @@ -146,6 +217,12 @@ define i32 @and32ri(i32 noundef %a) { ; CHECK-NEXT: andl $123456, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0xe7,0x40,0xe2,0x01,0x00] ; CHECK-NEXT: # imm = 0x1E240 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and32ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} andl $123456, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0xe7,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: retq # encoding: [0xc3] entry: %and = and i32 %a, 123456 ret i32 %and @@ -157,6 +234,12 @@ define i64 @and64ri(i64 noundef %a) { ; CHECK-NEXT: andl $123456, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0xe7,0x40,0xe2,0x01,0x00] ; CHECK-NEXT: # imm = 0x1E240 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and64ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} andl $123456, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0xe7,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: retq # encoding: [0xc3] entry: %and = and i64 %a, 123456 ret i64 %and @@ -167,6 +250,11 @@ define i8 @and8mr(ptr %a, i8 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andb %sil, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0x20,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and8mr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} andb %sil, (%rdi), %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x20,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i8, ptr %a %and = and i8 %t, %b @@ -178,6 +266,11 @@ define i16 @and16mr(ptr %a, i16 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andw %si, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0x21,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and16mr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} andw %si, (%rdi), %ax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7d,0x1c,0x21,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i16, ptr %a %and = and i16 %t, %b @@ -189,6 +282,11 @@ define i32 @and32mr(ptr %a, i32 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andl %esi, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x21,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and32mr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} andl %esi, (%rdi), %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x21,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i32, ptr %a %and = and i32 %t, %b @@ -200,6 +298,11 @@ define i64 @and64mr(ptr %a, i64 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andq %rsi, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x21,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and64mr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} andq %rsi, (%rdi), %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x21,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i64, ptr %a %and = and i64 %t, %b @@ -213,6 +316,13 @@ define i16 @and16mi8(ptr %a) { ; CHECK-NEXT: andl $123, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xe0,0x7b] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and16mi8: +; NF: # %bb.0: # %entry +; NF-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] +; NF-NEXT: andl $123, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xe0,0x7b] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i16, ptr %a %and = and i16 %t, 123 @@ -224,6 +334,11 @@ define i32 @and32mi8(ptr %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andl $123, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0x27,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and32mi8: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} andl $123, (%rdi), %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x83,0x27,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i32, ptr %a %and = and i32 %t, 123 @@ -236,6 +351,12 @@ define i64 @and64mi8(ptr %a) { ; CHECK-NEXT: movq (%rdi), %rax # encoding: [0x48,0x8b,0x07] ; CHECK-NEXT: andl $123, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xe0,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and64mi8: +; NF: # %bb.0: # %entry +; NF-NEXT: movq (%rdi), %rax # encoding: [0x48,0x8b,0x07] +; NF-NEXT: andl $123, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xe0,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i64, ptr %a %and = and i64 %t, 123 @@ -247,6 +368,11 @@ define i8 @and8mi(ptr %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andb $123, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0x80,0x27,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and8mi: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} andb $123, (%rdi), %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x80,0x27,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i8, ptr %a %and = and i8 %t, 123 @@ -261,6 +387,14 @@ define i16 @and16mi(ptr %a) { ; CHECK-NEXT: # imm = 0x4D2 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and16mi: +; NF: # %bb.0: # %entry +; NF-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] +; NF-NEXT: andl $1234, %eax # EVEX TO LEGACY Compression encoding: [0x25,0xd2,0x04,0x00,0x00] +; NF-NEXT: # imm = 0x4D2 +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i16, ptr %a %and = and i16 %t, 1234 @@ -273,6 +407,12 @@ define i32 @and32mi(ptr %a) { ; CHECK-NEXT: andl $123456, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0x27,0x40,0xe2,0x01,0x00] ; CHECK-NEXT: # imm = 0x1E240 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and32mi: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} andl $123456, (%rdi), %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0x27,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i32, ptr %a %and = and i32 %t, 123456 @@ -286,6 +426,13 @@ define i64 @and64mi(ptr %a) { ; CHECK-NEXT: andl $123456, %eax # EVEX TO LEGACY Compression encoding: [0x25,0x40,0xe2,0x01,0x00] ; CHECK-NEXT: # imm = 0x1E240 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and64mi: +; NF: # %bb.0: # %entry +; NF-NEXT: movq (%rdi), %rax # encoding: [0x48,0x8b,0x07] +; NF-NEXT: andl $123456, %eax # EVEX TO LEGACY Compression encoding: [0x25,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i64, ptr %a %and = and i64 %t, 123456 @@ -303,6 +450,15 @@ define i1 @andflag8rr(i8 %a, i8 %b) { ; CHECK-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: andflag8rr: +; NF: # %bb.0: +; NF-NEXT: notb %sil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xf6,0xd6] +; NF-NEXT: andb %al, %dil, %cl # encoding: [0x62,0xf4,0x74,0x18,0x20,0xc7] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %xor = xor i8 %b, -1 %v0 = and i8 %a, %xor ; 0xff << 50 %v1 = icmp eq i8 %v0, 0 @@ -319,6 +475,15 @@ define i1 @andflag16rr(i16 %a, i16 %b) { ; CHECK-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: andflag16rr: +; NF: # %bb.0: +; NF-NEXT: notl %esi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xf7,0xd6] +; NF-NEXT: andw %ax, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x21,0xc7] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %xor = xor i16 %b, -1 %v0 = and i16 %a, %xor ; 0xff << 50 %v1 = icmp eq i16 %v0, 0 @@ -334,6 +499,14 @@ define i1 @andflag32rr(i32 %a, i32 %b) { ; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: andflag32rr: +; NF: # %bb.0: +; NF-NEXT: andl %esi, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x21,0xf7] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %v0 = and i32 %a, %b ; 0xff << 50 %v1 = icmp eq i32 %v0, 0 store i32 %v0, ptr @d64 @@ -348,6 +521,14 @@ define i1 @andflag64rr(i64 %a, i64 %b) { ; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: andflag64rr: +; NF: # %bb.0: +; NF-NEXT: andq %rsi, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x21,0xf7] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %v0 = and i64 %a, %b ; 0xff << 50 %v1 = icmp eq i64 %v0, 0 store i64 %v0, ptr @d64 @@ -363,6 +544,15 @@ define i1 @andflag8rm(ptr %ptr, i8 %b) { ; CHECK-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: andflag8rm: +; NF: # %bb.0: +; NF-NEXT: notb %sil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xf6,0xd6] +; NF-NEXT: andb (%rdi), %al, %cl # encoding: [0x62,0xf4,0x74,0x18,0x22,0x07] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %a = load i8, ptr %ptr %xor = xor i8 %b, -1 %v0 = and i8 %a, %xor ; 0xff << 50 @@ -380,6 +570,15 @@ define i1 @andflag16rm(ptr %ptr, i16 %b) { ; CHECK-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: andflag16rm: +; NF: # %bb.0: +; NF-NEXT: notl %esi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xf7,0xd6] +; NF-NEXT: andw (%rdi), %ax, %cx # encoding: [0x62,0xf4,0x75,0x18,0x23,0x07] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %a = load i16, ptr %ptr %xor = xor i16 %b, -1 %v0 = and i16 %a, %xor ; 0xff << 50 @@ -396,6 +595,14 @@ define i1 @andflag32rm(ptr %ptr, i32 %b) { ; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: andflag32rm: +; NF: # %bb.0: +; NF-NEXT: andl (%rdi), %esi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x23,0x37] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %a = load i32, ptr %ptr %v0 = and i32 %a, %b ; 0xff << 50 %v1 = icmp eq i32 %v0, 0 @@ -411,6 +618,14 @@ define i1 @andflag64rm(ptr %ptr, i64 %b) { ; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: andflag64rm: +; NF: # %bb.0: +; NF-NEXT: andq (%rdi), %rsi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x23,0x37] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %a = load i64, ptr %ptr %v0 = and i64 %a, %b ; 0xff << 50 %v1 = icmp eq i64 %v0, 0 @@ -426,6 +641,14 @@ define i1 @andflag8ri(i8 %a) { ; CHECK-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: andflag8ri: +; NF: # %bb.0: +; NF-NEXT: andb $-124, %dil, %cl # encoding: [0x62,0xf4,0x74,0x18,0x80,0xe7,0x84] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %xor = xor i8 123, -1 %v0 = and i8 %a, %xor ; 0xff << 50 %v1 = icmp eq i8 %v0, 0 @@ -442,6 +665,15 @@ define i1 @andflag16ri(i16 %a) { ; CHECK-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: andflag16ri: +; NF: # %bb.0: +; NF-NEXT: andw $-1235, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x81,0xe7,0x2d,0xfb] +; NF-NEXT: # imm = 0xFB2D +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %xor = xor i16 1234, -1 %v0 = and i16 %a, %xor ; 0xff << 50 %v1 = icmp eq i16 %v0, 0 @@ -458,6 +690,15 @@ define i1 @andflag32ri(i32 %a) { ; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: andflag32ri: +; NF: # %bb.0: +; NF-NEXT: andl $123456, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x81,0xe7,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %v0 = and i32 %a, 123456 ; 0xff << 50 %v1 = icmp eq i32 %v0, 0 store i32 %v0, ptr @d64 @@ -473,6 +714,15 @@ define i1 @andflag64ri(i64 %a) { ; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: andflag64ri: +; NF: # %bb.0: +; NF-NEXT: andq $123456, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xe7,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %v0 = and i64 %a, 123456 ; 0xff << 50 %v1 = icmp eq i64 %v0, 0 store i64 %v0, ptr @d64 @@ -487,6 +737,14 @@ define i1 @andflag16ri8(i16 %a) { ; CHECK-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: andflag16ri8: +; NF: # %bb.0: +; NF-NEXT: andw $-124, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x83,0xe7,0x84] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %xor = xor i16 123, -1 %v0 = and i16 %a, %xor ; 0xff << 50 %v1 = icmp eq i16 %v0, 0 @@ -502,6 +760,14 @@ define i1 @andflag32ri8(i32 %a) { ; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: andflag32ri8: +; NF: # %bb.0: +; NF-NEXT: andl $123, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x83,0xe7,0x7b] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %v0 = and i32 %a, 123 ; 0xff << 50 %v1 = icmp eq i32 %v0, 0 store i32 %v0, ptr @d64 @@ -516,6 +782,14 @@ define i1 @andflag64ri8(i64 %a) { ; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: andflag64ri8: +; NF: # %bb.0: +; NF-NEXT: andq $123, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x83,0xe7,0x7b] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %v0 = and i64 %a, 123 ; 0xff << 50 %v1 = icmp eq i64 %v0, 0 store i64 %v0, ptr @d64 @@ -527,6 +801,11 @@ define void @and8mr_legacy(ptr %a, i8 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andb %sil, (%rdi) # encoding: [0x40,0x20,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and8mr_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: andb %sil, (%rdi) # encoding: [0x40,0x20,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i8, ptr %a %and = and i8 %t, %b @@ -539,6 +818,11 @@ define void @and16mr_legacy(ptr %a, i16 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andw %si, (%rdi) # encoding: [0x66,0x21,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and16mr_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: andw %si, (%rdi) # encoding: [0x66,0x21,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i16, ptr %a %and = and i16 %t, %b @@ -551,6 +835,11 @@ define void @and32mr_legacy(ptr %a, i32 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andl %esi, (%rdi) # encoding: [0x21,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and32mr_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: andl %esi, (%rdi) # encoding: [0x21,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i32, ptr %a %and = and i32 %t, %b @@ -563,6 +852,11 @@ define void @and64mr_legacy(ptr %a, i64 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andq %rsi, (%rdi) # encoding: [0x48,0x21,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and64mr_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: andq %rsi, (%rdi) # encoding: [0x48,0x21,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i64, ptr %a %and = and i64 %t, %b @@ -575,6 +869,11 @@ define void @and8mi_legacy(ptr %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andb $123, (%rdi) # encoding: [0x80,0x27,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and8mi_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: andb $123, (%rdi) # encoding: [0x80,0x27,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i8, ptr %a %and = and i8 %t, 123 @@ -588,6 +887,12 @@ define void @and16mi_legacy(ptr %a) { ; CHECK-NEXT: andw $1234, (%rdi) # encoding: [0x66,0x81,0x27,0xd2,0x04] ; CHECK-NEXT: # imm = 0x4D2 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and16mi_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: andw $1234, (%rdi) # encoding: [0x66,0x81,0x27,0xd2,0x04] +; NF-NEXT: # imm = 0x4D2 +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i16, ptr %a %and = and i16 %t, 1234 @@ -601,6 +906,12 @@ define void @and32mi_legacy(ptr %a) { ; CHECK-NEXT: andl $123456, (%rdi) # encoding: [0x81,0x27,0x40,0xe2,0x01,0x00] ; CHECK-NEXT: # imm = 0x1E240 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and32mi_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: andl $123456, (%rdi) # encoding: [0x81,0x27,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i32, ptr %a %and = and i32 %t, 123456 @@ -614,6 +925,12 @@ define void @and64mi_legacy(ptr %a) { ; CHECK-NEXT: andq $123456, (%rdi) # encoding: [0x48,0x81,0x27,0x40,0xe2,0x01,0x00] ; CHECK-NEXT: # imm = 0x1E240 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: and64mi_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: andq $123456, (%rdi) # encoding: [0x48,0x81,0x27,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i64, ptr %a %and = and i64 %t, 123456 diff --git a/llvm/test/CodeGen/X86/apx/compress-evex.mir b/llvm/test/CodeGen/X86/apx/compress-evex.mir index d8bef886e234f..5a59ab0f8a9d0 100644 --- a/llvm/test/CodeGen/X86/apx/compress-evex.mir +++ b/llvm/test/CodeGen/X86/apx/compress-evex.mir @@ -1,4 +1,5 @@ -# RUN: llc %s -mtriple=x86_64-unknown -mattr=+ndd,+egpr -start-before=x86-compress-evex -show-mc-encoding -o - | FileCheck %s +# RUN: llc %s -mtriple=x86_64-unknown -mattr=+ndd,+egpr -start-before=x86-compress-evex -show-mc-encoding -o - | FileCheck --check-prefixes=CHECK,NDD %s +# RUN: llc %s -mtriple=x86_64-unknown -mattr=+ndd,+egpr,+nf -start-before=x86-compress-evex -show-mc-encoding -o - | FileCheck --check-prefixes=CHECK,NDD-NF %s ... --- @@ -46,7 +47,8 @@ name: ndd_2_non_ndd_incommutable body: | bb.0.entry: liveins: $rdi, $rsi - ; CHECK: subq %rax, %rsi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x29,0xc6] + ; NDD: subq %rax, %rsi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x29,0xc6] + ; NDD-NF: {nf} subq %rax, %rsi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x29,0xc6] renamable $rax = ADD64rr_ND killed renamable $rdi, renamable $rsi, implicit-def dead $eflags renamable $rax = SUB64rr_ND killed renamable $rsi, killed renamable $rax, implicit-def dead $eflags RET64 $rax @@ -55,7 +57,8 @@ body: | name: ndd_2_non_ndd_mem body: | bb.0.entry: - ; CHECK: addq $123456, (%rax), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0x00,0x40,0xe2,0x01,0x00] + ; NDD: addq $123456, (%rax), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0x00,0x40,0xe2,0x01,0x00] + ; NDD-NF: {nf} addq $123456, (%rax), %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x81,0x00,0x40,0xe2,0x01,0x00] renamable $rax = MOV64rm $noreg, 1, $noreg, 0, $fs renamable $rax = nsw ADD64mi32_ND killed renamable $rax, 1, $noreg, 0, $noreg, 123456, implicit-def dead $eflags RET64 $rax @@ -88,5 +91,39 @@ body: | ; CHECK: bswapq %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0xc8] renamable $rax = MOVBE64rr killed renamable $rax RET64 killed $rax - +... +--- +name: non_nf_2_nf +body: | + bb.0.entry: + liveins: $rdi, $r16 + ; CHECK: addq %r16, %rdi # encoding: [0xd5,0x48,0x01,0xc7] + ; NDD: xorq %r16, %rdi, %rax # encoding: [0x62,0xe4,0xfc,0x18,0x31,0xc7] + ; NDD-NF: {nf} xorq %r16, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xe4,0xfc,0x1c,0x31,0xc7] + ; CHECK: addq %r16, %rax, %rdi # encoding: [0x62,0xe4,0xc4,0x18,0x01,0xc0] + ; CHECK: adcq %rdi, %r16, %rax # encoding: [0x62,0xfc,0xfc,0x18,0x11,0xf8] + $rdi = ADD64rr $rdi, $r16, implicit-def dead $eflags + $rax = XOR64rr_ND $rdi, $r16, implicit-def dead $eflags + $rdi = ADD64rr_ND $rax, $r16, implicit-def $eflags + $rax = ADC64rr_ND $r16, $rdi, implicit-def dead $eflags, implicit $eflags + RET64 $rax +... +--- +name: cfcmov_no_convert +body: | + bb.0.entry: + liveins: $eflags, $rax, $rbx + ; CHECK: cfcmovew %bx, %ax, %ax # encoding: [0x62,0xf4,0x7d,0x1c,0x44,0xc3] + ; CHECK: cfcmovsw 24(%rax), %bx, %bx # encoding: [0x62,0xf4,0x65,0x1c,0x48,0x58,0x18] + ; CHECK: cfcmovel %ebx, %eax, %eax # encoding: [0x62,0xf4,0x7c,0x1c,0x44,0xc3] + ; CHECK: cfcmovsl 24(%rax), %ebx, %ebx # encoding: [0x62,0xf4,0x64,0x1c,0x48,0x58,0x18] + ; CHECK: cfcmoveq %rbx, %rax, %rax # encoding: [0x62,0xf4,0xfc,0x1c,0x44,0xc3] + ; CHECK: cfcmovsq 24(%rax), %rbx, %rbx # encoding: [0x62,0xf4,0xe4,0x1c,0x48,0x58,0x18] + $ax = CFCMOV16rr_ND $ax, $bx, 4, implicit $eflags + $bx = CFCMOV16rm_ND $bx, $rax, 1, $noreg, 24, $noreg, 8, implicit $eflags + $eax = CFCMOV32rr_ND $eax, $ebx, 4, implicit $eflags + $ebx = CFCMOV32rm_ND $ebx, $rax, 1, $noreg, 24, $noreg, 8, implicit $eflags + $rax = CFCMOV64rr_ND $rax, $rbx, 4, implicit $eflags + $rbx = CFCMOV64rm_ND $rbx, $rax, 1, $noreg, 24, $noreg, 8, implicit $eflags + RET64 $rax ... diff --git a/llvm/test/CodeGen/X86/apx/dec.ll b/llvm/test/CodeGen/X86/apx/dec.ll index fcb2cae3b5cad..a18ed2ace603a 100644 --- a/llvm/test/CodeGen/X86/apx/dec.ll +++ b/llvm/test/CodeGen/X86/apx/dec.ll @@ -1,11 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd,nf -verify-machineinstrs | FileCheck --check-prefix=NF %s define i8 @dec8r(i8 noundef %a) { ; CHECK-LABEL: dec8r: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: decb %dil, %al ; CHECK-NEXT: retq +; +; NF-LABEL: dec8r: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} decb %dil, %al +; NF-NEXT: retq entry: %dec = sub i8 %a, 1 ret i8 %dec @@ -17,6 +23,12 @@ define i16 @dec16r(i16 noundef %a) { ; CHECK-NEXT: decl %edi, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq +; +; NF-LABEL: dec16r: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} decl %edi, %eax +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq entry: %dec = sub i16 %a, 1 ret i16 %dec @@ -27,6 +39,11 @@ define i32 @dec32r(i32 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: decl %edi, %eax ; CHECK-NEXT: retq +; +; NF-LABEL: dec32r: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} decl %edi, %eax +; NF-NEXT: retq entry: %dec = sub i32 %a, 1 ret i32 %dec @@ -37,6 +54,11 @@ define i64 @dec64r(i64 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: decq %rdi, %rax ; CHECK-NEXT: retq +; +; NF-LABEL: dec64r: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} decq %rdi, %rax +; NF-NEXT: retq entry: %dec = sub i64 %a, 1 ret i64 %dec @@ -47,6 +69,11 @@ define i8 @dec8m(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: decb (%rdi), %al ; CHECK-NEXT: retq +; +; NF-LABEL: dec8m: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} decb (%rdi), %al +; NF-NEXT: retq entry: %a = load i8, ptr %ptr %dec = sub i8 %a, 1 @@ -60,6 +87,13 @@ define i16 @dec16m(ptr %ptr) { ; CHECK-NEXT: decl %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq +; +; NF-LABEL: dec16m: +; NF: # %bb.0: # %entry +; NF-NEXT: movzwl (%rdi), %eax +; NF-NEXT: decl %eax +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq entry: %a = load i16, ptr %ptr %dec = sub i16 %a, 1 @@ -71,6 +105,11 @@ define i32 @dec32m(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: decl (%rdi), %eax ; CHECK-NEXT: retq +; +; NF-LABEL: dec32m: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} decl (%rdi), %eax +; NF-NEXT: retq entry: %a = load i32, ptr %ptr %dec = sub i32 %a, 1 @@ -82,6 +121,11 @@ define i64 @dec64m(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: decq (%rdi), %rax ; CHECK-NEXT: retq +; +; NF-LABEL: dec64m: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} decq (%rdi), %rax +; NF-NEXT: retq entry: %a = load i64, ptr %ptr %dec = sub i64 %a, 1 @@ -93,6 +137,11 @@ define void @dec8m_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: decb (%rdi) ; CHECK-NEXT: retq +; +; NF-LABEL: dec8m_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: decb (%rdi) +; NF-NEXT: retq entry: %a = load i8, ptr %ptr %dec = sub i8 %a, 1 @@ -105,6 +154,11 @@ define void @dec16m_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: decw (%rdi) ; CHECK-NEXT: retq +; +; NF-LABEL: dec16m_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: decw (%rdi) +; NF-NEXT: retq entry: %a = load i16, ptr %ptr %dec = sub i16 %a, 1 @@ -117,6 +171,11 @@ define void @dec32m_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: decl (%rdi) ; CHECK-NEXT: retq +; +; NF-LABEL: dec32m_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: decl (%rdi) +; NF-NEXT: retq entry: %a = load i32, ptr %ptr %dec = sub i32 %a, 1 @@ -129,6 +188,11 @@ define void @dec64m_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: decq (%rdi) ; CHECK-NEXT: retq +; +; NF-LABEL: dec64m_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: decq (%rdi) +; NF-NEXT: retq entry: %a = load i64, ptr %ptr %dec = sub i64 %a, 1 diff --git a/llvm/test/CodeGen/X86/apx/imul.ll b/llvm/test/CodeGen/X86/apx/imul.ll index 2963a6477be4c..d97b2c0baec5e 100644 --- a/llvm/test/CodeGen/X86/apx/imul.ll +++ b/llvm/test/CodeGen/X86/apx/imul.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd,nf -verify-machineinstrs | FileCheck --check-prefix=NF %s define i16 @mul16rr(i16 noundef %a, i16 noundef %b) { ; CHECK-LABEL: mul16rr: @@ -7,6 +8,12 @@ define i16 @mul16rr(i16 noundef %a, i16 noundef %b) { ; CHECK-NEXT: imull %esi, %edi, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq +; +; NF-LABEL: mul16rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} imull %esi, %edi, %eax +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq entry: %mul = mul i16 %a, %b ret i16 %mul @@ -17,6 +24,11 @@ define i32 @mul32rr(i32 noundef %a, i32 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: imull %esi, %edi, %eax ; CHECK-NEXT: retq +; +; NF-LABEL: mul32rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} imull %esi, %edi, %eax +; NF-NEXT: retq entry: %mul = mul i32 %a, %b ret i32 %mul @@ -27,6 +39,11 @@ define i64 @mul64rr(i64 noundef %a, i64 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: imulq %rsi, %rdi, %rax ; CHECK-NEXT: retq +; +; NF-LABEL: mul64rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} imulq %rsi, %rdi, %rax +; NF-NEXT: retq entry: %mul = mul i64 %a, %b ret i64 %mul @@ -37,6 +54,11 @@ define i16 @smul16rr(i16 noundef %a, i16 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: imulw %si, %di, %ax ; CHECK-NEXT: retq +; +; NF-LABEL: smul16rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} imulw %si, %di, %ax +; NF-NEXT: retq entry: %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %a, i16 %b) %mul = extractvalue {i16, i1} %t, 0 @@ -48,6 +70,11 @@ define i32 @smul32rr(i32 noundef %a, i32 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: imull %esi, %edi, %eax ; CHECK-NEXT: retq +; +; NF-LABEL: smul32rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} imull %esi, %edi, %eax +; NF-NEXT: retq entry: %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %a, i32 %b) %mul = extractvalue {i32, i1} %t, 0 @@ -59,6 +86,11 @@ define i64 @smul64rr(i64 noundef %a, i64 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: imulq %rsi, %rdi, %rax ; CHECK-NEXT: retq +; +; NF-LABEL: smul64rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} imulq %rsi, %rdi, %rax +; NF-NEXT: retq entry: %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %a, i64 %b) %mul = extractvalue {i64, i1} %t, 0 @@ -70,6 +102,11 @@ define i16 @mul16rm(i16 noundef %a, ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: imulw (%rsi), %di, %ax ; CHECK-NEXT: retq +; +; NF-LABEL: mul16rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} imulw (%rsi), %di, %ax +; NF-NEXT: retq entry: %b = load i16, ptr %ptr %mul = mul i16 %a, %b @@ -81,6 +118,11 @@ define i32 @mul32rm(i32 noundef %a, ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: imull (%rsi), %edi, %eax ; CHECK-NEXT: retq +; +; NF-LABEL: mul32rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} imull (%rsi), %edi, %eax +; NF-NEXT: retq entry: %b = load i32, ptr %ptr %mul = mul i32 %a, %b @@ -92,6 +134,11 @@ define i64 @mul64rm(i64 noundef %a, ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: imulq (%rsi), %rdi, %rax ; CHECK-NEXT: retq +; +; NF-LABEL: mul64rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} imulq (%rsi), %rdi, %rax +; NF-NEXT: retq entry: %b = load i64, ptr %ptr %mul = mul i64 %a, %b @@ -103,6 +150,11 @@ define i16 @smul16rm(i16 noundef %a, ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: imulw (%rsi), %di, %ax ; CHECK-NEXT: retq +; +; NF-LABEL: smul16rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} imulw (%rsi), %di, %ax +; NF-NEXT: retq entry: %b = load i16, ptr %ptr %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %a, i16 %b) @@ -115,6 +167,11 @@ define i32 @smul32rm(i32 noundef %a, ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: imull (%rsi), %edi, %eax ; CHECK-NEXT: retq +; +; NF-LABEL: smul32rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} imull (%rsi), %edi, %eax +; NF-NEXT: retq entry: %b = load i32, ptr %ptr %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %a, i32 %b) @@ -127,6 +184,11 @@ define i64 @smul64rm(i64 noundef %a, ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: imulq (%rsi), %rdi, %rax ; CHECK-NEXT: retq +; +; NF-LABEL: smul64rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} imulq (%rsi), %rdi, %rax +; NF-NEXT: retq entry: %b = load i64, ptr %ptr %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %a, i64 %b) diff --git a/llvm/test/CodeGen/X86/apx/inc.ll b/llvm/test/CodeGen/X86/apx/inc.ll index a9c6d740cf2ce..8d31badb99779 100644 --- a/llvm/test/CodeGen/X86/apx/inc.ll +++ b/llvm/test/CodeGen/X86/apx/inc.ll @@ -1,11 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd,nf -verify-machineinstrs | FileCheck --check-prefix=NF %s define i8 @inc8r(i8 noundef %a) { ; CHECK-LABEL: inc8r: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: incb %dil, %al ; CHECK-NEXT: retq +; +; NF-LABEL: inc8r: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} incb %dil, %al +; NF-NEXT: retq entry: %inc = add i8 %a, 1 ret i8 %inc @@ -17,6 +23,12 @@ define i16 @inc16r(i16 noundef %a) { ; CHECK-NEXT: incl %edi, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq +; +; NF-LABEL: inc16r: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} incl %edi, %eax +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq entry: %inc = add i16 %a, 1 ret i16 %inc @@ -27,6 +39,11 @@ define i32 @inc32r(i32 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: incl %edi, %eax ; CHECK-NEXT: retq +; +; NF-LABEL: inc32r: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} incl %edi, %eax +; NF-NEXT: retq entry: %inc = add i32 %a, 1 ret i32 %inc @@ -37,6 +54,11 @@ define i64 @inc64r(i64 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: incq %rdi, %rax ; CHECK-NEXT: retq +; +; NF-LABEL: inc64r: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} incq %rdi, %rax +; NF-NEXT: retq entry: %inc = add i64 %a, 1 ret i64 %inc @@ -47,6 +69,11 @@ define i8 @inc8m(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: incb (%rdi), %al ; CHECK-NEXT: retq +; +; NF-LABEL: inc8m: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} incb (%rdi), %al +; NF-NEXT: retq entry: %a = load i8, ptr %ptr %inc = add i8 %a, 1 @@ -60,6 +87,13 @@ define i16 @inc16m(ptr %ptr) { ; CHECK-NEXT: incl %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq +; +; NF-LABEL: inc16m: +; NF: # %bb.0: # %entry +; NF-NEXT: movzwl (%rdi), %eax +; NF-NEXT: incl %eax +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq entry: %a = load i16, ptr %ptr %inc = add i16 %a, 1 @@ -71,6 +105,11 @@ define i32 @inc32m(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: incl (%rdi), %eax ; CHECK-NEXT: retq +; +; NF-LABEL: inc32m: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} incl (%rdi), %eax +; NF-NEXT: retq entry: %a = load i32, ptr %ptr %inc = add i32 %a, 1 @@ -82,6 +121,11 @@ define i64 @inc64m(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: incq (%rdi), %rax ; CHECK-NEXT: retq +; +; NF-LABEL: inc64m: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} incq (%rdi), %rax +; NF-NEXT: retq entry: %a = load i64, ptr %ptr %inc = add i64 %a, 1 @@ -97,6 +141,15 @@ define i8 @uinc8r(i8 noundef %a) { ; CHECK-NEXT: cmovel %ecx, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq +; +; NF-LABEL: uinc8r: +; NF: # %bb.0: # %entry +; NF-NEXT: incb %dil, %al +; NF-NEXT: movzbl %al, %eax +; NF-NEXT: movl $255, %ecx +; NF-NEXT: cmovel %ecx, %eax +; NF-NEXT: # kill: def $al killed $al killed $eax +; NF-NEXT: retq entry: %inc = call i8 @llvm.uadd.sat.i8(i8 %a, i8 1) ret i8 %inc @@ -110,6 +163,14 @@ define i16 @uinc16r(i16 noundef %a) { ; CHECK-NEXT: cmovel %ecx, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq +; +; NF-LABEL: uinc16r: +; NF: # %bb.0: # %entry +; NF-NEXT: incw %di, %ax +; NF-NEXT: movl $65535, %ecx # imm = 0xFFFF +; NF-NEXT: cmovel %ecx, %eax +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq entry: %inc = call i16 @llvm.uadd.sat.i16(i16 %a, i16 1) ret i16 %inc @@ -122,6 +183,13 @@ define i32 @uinc32r(i32 noundef %a) { ; CHECK-NEXT: movl $-1, %ecx ; CHECK-NEXT: cmovel %ecx, %eax ; CHECK-NEXT: retq +; +; NF-LABEL: uinc32r: +; NF: # %bb.0: # %entry +; NF-NEXT: incl %edi, %eax +; NF-NEXT: movl $-1, %ecx +; NF-NEXT: cmovel %ecx, %eax +; NF-NEXT: retq entry: %inc = call i32 @llvm.uadd.sat.i32(i32 %a, i32 1) ret i32 %inc @@ -134,6 +202,13 @@ define i64 @uinc64r(i64 noundef %a) { ; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: retq +; +; NF-LABEL: uinc64r: +; NF: # %bb.0: # %entry +; NF-NEXT: incq %rdi, %rax +; NF-NEXT: movq $-1, %rcx +; NF-NEXT: cmoveq %rcx, %rax +; NF-NEXT: retq entry: %inc = call i64 @llvm.uadd.sat.i64(i64 %a, i64 1) ret i64 %inc @@ -149,6 +224,11 @@ define void @inc8m_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: incb (%rdi) ; CHECK-NEXT: retq +; +; NF-LABEL: inc8m_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: incb (%rdi) +; NF-NEXT: retq entry: %a = load i8, ptr %ptr %inc = add i8 %a, 1 @@ -161,6 +241,11 @@ define void @inc16m_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: incw (%rdi) ; CHECK-NEXT: retq +; +; NF-LABEL: inc16m_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: incw (%rdi) +; NF-NEXT: retq entry: %a = load i16, ptr %ptr %inc = add i16 %a, 1 @@ -173,6 +258,11 @@ define void @inc32m_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: incl (%rdi) ; CHECK-NEXT: retq +; +; NF-LABEL: inc32m_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: incl (%rdi) +; NF-NEXT: retq entry: %a = load i32, ptr %ptr %inc = add i32 %a, 1 @@ -185,6 +275,11 @@ define void @inc64m_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: incq (%rdi) ; CHECK-NEXT: retq +; +; NF-LABEL: inc64m_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: incq (%rdi) +; NF-NEXT: retq entry: %a = load i64, ptr %ptr %inc = add i64 %a, 1 diff --git a/llvm/test/CodeGen/X86/apx/neg.ll b/llvm/test/CodeGen/X86/apx/neg.ll index c1c53fbdaebd8..5e033e33cb8b2 100644 --- a/llvm/test/CodeGen/X86/apx/neg.ll +++ b/llvm/test/CodeGen/X86/apx/neg.ll @@ -1,11 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd,nf -verify-machineinstrs | FileCheck --check-prefix=NF %s define i8 @neg8r(i8 noundef %a) { ; CHECK-LABEL: neg8r: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: negb %dil, %al ; CHECK-NEXT: retq +; +; NF-LABEL: neg8r: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} negb %dil, %al +; NF-NEXT: retq entry: %neg = sub i8 0, %a ret i8 %neg @@ -17,6 +23,12 @@ define i16 @neg16r(i16 noundef %a) { ; CHECK-NEXT: negl %edi, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq +; +; NF-LABEL: neg16r: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} negl %edi, %eax +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq entry: %neg = sub i16 0, %a ret i16 %neg @@ -27,6 +39,11 @@ define i32 @neg32r(i32 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: negl %edi, %eax ; CHECK-NEXT: retq +; +; NF-LABEL: neg32r: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} negl %edi, %eax +; NF-NEXT: retq entry: %neg = sub i32 0, %a ret i32 %neg @@ -37,6 +54,11 @@ define i64 @neg64r(i64 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: negq %rdi, %rax ; CHECK-NEXT: retq +; +; NF-LABEL: neg64r: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} negq %rdi, %rax +; NF-NEXT: retq entry: %neg = sub i64 0, %a ret i64 %neg @@ -47,6 +69,11 @@ define i8 @neg8m(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: negb (%rdi), %al ; CHECK-NEXT: retq +; +; NF-LABEL: neg8m: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} negb (%rdi), %al +; NF-NEXT: retq entry: %a = load i8, ptr %ptr %neg = sub i8 0, %a @@ -58,6 +85,11 @@ define i16 @neg16m(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: negw (%rdi), %ax ; CHECK-NEXT: retq +; +; NF-LABEL: neg16m: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} negw (%rdi), %ax +; NF-NEXT: retq entry: %a = load i16, ptr %ptr %neg = sub i16 0, %a @@ -69,6 +101,11 @@ define i32 @neg32m(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: negl (%rdi), %eax ; CHECK-NEXT: retq +; +; NF-LABEL: neg32m: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} negl (%rdi), %eax +; NF-NEXT: retq entry: %a = load i32, ptr %ptr %neg = sub i32 0, %a @@ -80,6 +117,11 @@ define i64 @neg64m(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: negq (%rdi), %rax ; CHECK-NEXT: retq +; +; NF-LABEL: neg64m: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} negq (%rdi), %rax +; NF-NEXT: retq entry: %a = load i64, ptr %ptr %neg = sub i64 0, %a @@ -91,6 +133,11 @@ define i8 @uneg8r(i8 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: negb %dil, %al ; CHECK-NEXT: retq +; +; NF-LABEL: uneg8r: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} negb %dil, %al +; NF-NEXT: retq entry: %t = call {i8, i1} @llvm.usub.with.overflow.i8(i8 0, i8 %a) %neg = extractvalue {i8, i1} %t, 0 @@ -103,6 +150,12 @@ define i16 @uneg16r(i16 noundef %a) { ; CHECK-NEXT: negl %edi, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq +; +; NF-LABEL: uneg16r: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} negl %edi, %eax +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq entry: %t = call {i16, i1} @llvm.usub.with.overflow.i16(i16 0, i16 %a) %neg = extractvalue {i16, i1} %t, 0 @@ -114,6 +167,11 @@ define i32 @uneg32r(i32 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: negl %edi, %eax ; CHECK-NEXT: retq +; +; NF-LABEL: uneg32r: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} negl %edi, %eax +; NF-NEXT: retq entry: %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 0, i32 %a) %neg = extractvalue {i32, i1} %t, 0 @@ -125,6 +183,11 @@ define i64 @uneg64r(i64 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: negq %rdi, %rax ; CHECK-NEXT: retq +; +; NF-LABEL: uneg64r: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} negq %rdi, %rax +; NF-NEXT: retq entry: %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 0, i64 %a) %neg = extractvalue {i64, i1} %t, 0 @@ -136,6 +199,11 @@ define i8 @uneg8m(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: negb (%rdi), %al ; CHECK-NEXT: retq +; +; NF-LABEL: uneg8m: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} negb (%rdi), %al +; NF-NEXT: retq entry: %a = load i8, ptr %ptr %t = call {i8, i1} @llvm.usub.with.overflow.i8(i8 0, i8 %a) @@ -148,6 +216,11 @@ define i16 @uneg16m(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: negw (%rdi), %ax ; CHECK-NEXT: retq +; +; NF-LABEL: uneg16m: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} negw (%rdi), %ax +; NF-NEXT: retq entry: %a = load i16, ptr %ptr %t = call {i16, i1} @llvm.usub.with.overflow.i16(i16 0, i16 %a) @@ -160,6 +233,11 @@ define i32 @uneg32m(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: negl (%rdi), %eax ; CHECK-NEXT: retq +; +; NF-LABEL: uneg32m: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} negl (%rdi), %eax +; NF-NEXT: retq entry: %a = load i32, ptr %ptr %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 0, i32 %a) @@ -172,6 +250,11 @@ define i64 @uneg64m(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: negq (%rdi), %rax ; CHECK-NEXT: retq +; +; NF-LABEL: uneg64m: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} negq (%rdi), %rax +; NF-NEXT: retq entry: %a = load i64, ptr %ptr %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 0, i64 %a) @@ -189,6 +272,11 @@ define void @neg8m_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: negb (%rdi) ; CHECK-NEXT: retq +; +; NF-LABEL: neg8m_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: negb (%rdi) +; NF-NEXT: retq entry: %a = load i8, ptr %ptr %neg = sub i8 0, %a @@ -201,6 +289,11 @@ define void @neg16m_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: negw (%rdi) ; CHECK-NEXT: retq +; +; NF-LABEL: neg16m_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: negw (%rdi) +; NF-NEXT: retq entry: %a = load i16, ptr %ptr %neg = sub i16 0, %a @@ -213,6 +306,11 @@ define void @neg32m_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: negl (%rdi) ; CHECK-NEXT: retq +; +; NF-LABEL: neg32m_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: negl (%rdi) +; NF-NEXT: retq entry: %a = load i32, ptr %ptr %neg = sub i32 0, %a @@ -225,6 +323,11 @@ define void @neg64m_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: negq (%rdi) ; CHECK-NEXT: retq +; +; NF-LABEL: neg64m_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: negq (%rdi) +; NF-NEXT: retq entry: %a = load i64, ptr %ptr %neg = sub i64 0, %a diff --git a/llvm/test/CodeGen/X86/apx/or.ll b/llvm/test/CodeGen/X86/apx/or.ll index 3d024e962400f..d404279e14f7a 100644 --- a/llvm/test/CodeGen/X86/apx/or.ll +++ b/llvm/test/CodeGen/X86/apx/or.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd -verify-machineinstrs --show-mc-encoding | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd,nf -verify-machineinstrs --show-mc-encoding | FileCheck --check-prefix=NF %s define i8 @or8rr(i8 noundef %a, i8 noundef %b) { ; CHECK-LABEL: or8rr: @@ -7,6 +8,12 @@ define i8 @or8rr(i8 noundef %a, i8 noundef %b) { ; CHECK-NEXT: orl %esi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x09,0xf7] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or8rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} orl %esi, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x09,0xf7] +; NF-NEXT: # kill: def $al killed $al killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %or = or i8 %a, %b ret i8 %or @@ -18,6 +25,12 @@ define i16 @or16rr(i16 noundef %a, i16 noundef %b) { ; CHECK-NEXT: orl %esi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x09,0xf7] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or16rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} orl %esi, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x09,0xf7] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %or = or i16 %a, %b ret i16 %or @@ -28,6 +41,11 @@ define i32 @or32rr(i32 noundef %a, i32 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orl %esi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x09,0xf7] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or32rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} orl %esi, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x09,0xf7] +; NF-NEXT: retq # encoding: [0xc3] entry: %or = or i32 %a, %b ret i32 %or @@ -38,6 +56,11 @@ define i64 @or64rr(i64 noundef %a, i64 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orq %rsi, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x09,0xf7] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or64rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} orq %rsi, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x09,0xf7] +; NF-NEXT: retq # encoding: [0xc3] entry: %or = or i64 %a, %b ret i64 %or @@ -48,6 +71,11 @@ define i8 @or8rm(i8 noundef %a, ptr %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orb (%rsi), %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0x0a,0x3e] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or8rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} orb (%rsi), %dil, %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x0a,0x3e] +; NF-NEXT: retq # encoding: [0xc3] entry: %t = load i8, ptr %b %or = or i8 %a, %t @@ -59,6 +87,11 @@ define i16 @or16rm(i16 noundef %a, ptr %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orw (%rsi), %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x0b,0x3e] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or16rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} orw (%rsi), %di, %ax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7d,0x1c,0x0b,0x3e] +; NF-NEXT: retq # encoding: [0xc3] entry: %t = load i16, ptr %b %or = or i16 %a, %t @@ -70,6 +103,11 @@ define i32 @or32rm(i32 noundef %a, ptr %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orl (%rsi), %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x0b,0x3e] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or32rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} orl (%rsi), %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x0b,0x3e] +; NF-NEXT: retq # encoding: [0xc3] entry: %t = load i32, ptr %b %or = or i32 %a, %t @@ -81,6 +119,11 @@ define i64 @or64rm(i64 noundef %a, ptr %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orq (%rsi), %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x0b,0x3e] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or64rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} orq (%rsi), %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x0b,0x3e] +; NF-NEXT: retq # encoding: [0xc3] entry: %t = load i64, ptr %b %or = or i64 %a, %t @@ -93,6 +136,12 @@ define i16 @or16ri8(i16 noundef %a) { ; CHECK-NEXT: orl $123, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xcf,0x7b] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or16ri8: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} orl $123, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x83,0xcf,0x7b] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %or = or i16 %a, 123 ret i16 %or @@ -103,6 +152,11 @@ define i32 @or32ri8(i32 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orl $123, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xcf,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or32ri8: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} orl $123, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x83,0xcf,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %or = or i32 %a, 123 ret i32 %or @@ -113,6 +167,11 @@ define i64 @or64ri8(i64 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orq $123, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0xcf,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or64ri8: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} orq $123, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x83,0xcf,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %or = or i64 %a, 123 ret i64 %or @@ -123,6 +182,11 @@ define i8 @or8ri(i8 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orb $123, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0x80,0xcf,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or8ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} orb $123, %dil, %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x80,0xcf,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %or = or i8 %a, 123 ret i8 %or @@ -135,6 +199,13 @@ define i16 @or16ri(i16 noundef %a) { ; CHECK-NEXT: # imm = 0x4D2 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or16ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} orl $1234, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0xcf,0xd2,0x04,0x00,0x00] +; NF-NEXT: # imm = 0x4D2 +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %or = or i16 %a, 1234 ret i16 %or @@ -146,6 +217,12 @@ define i32 @or32ri(i32 noundef %a) { ; CHECK-NEXT: orl $123456, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0xcf,0x40,0xe2,0x01,0x00] ; CHECK-NEXT: # imm = 0x1E240 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or32ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} orl $123456, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0xcf,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: retq # encoding: [0xc3] entry: %or = or i32 %a, 123456 ret i32 %or @@ -157,6 +234,12 @@ define i64 @or64ri(i64 noundef %a) { ; CHECK-NEXT: orq $123456, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0xcf,0x40,0xe2,0x01,0x00] ; CHECK-NEXT: # imm = 0x1E240 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or64ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} orq $123456, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x81,0xcf,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: retq # encoding: [0xc3] entry: %or = or i64 %a, 123456 ret i64 %or @@ -167,6 +250,11 @@ define i8 @or8mr(ptr %a, i8 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orb %sil, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0x08,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or8mr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} orb %sil, (%rdi), %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x08,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i8, ptr %a %or = or i8 %t, %b @@ -178,6 +266,11 @@ define i16 @or16mr(ptr %a, i16 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orw %si, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0x09,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or16mr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} orw %si, (%rdi), %ax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7d,0x1c,0x09,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i16, ptr %a %or = or i16 %t, %b @@ -189,6 +282,11 @@ define i32 @or32mr(ptr %a, i32 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orl %esi, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x09,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or32mr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} orl %esi, (%rdi), %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x09,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i32, ptr %a %or = or i32 %t, %b @@ -200,6 +298,11 @@ define i64 @or64mr(ptr %a, i64 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orq %rsi, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x09,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or64mr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} orq %rsi, (%rdi), %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x09,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i64, ptr %a %or = or i64 %t, %b @@ -213,6 +316,13 @@ define i16 @or16mi8(ptr %a) { ; CHECK-NEXT: orl $123, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xc8,0x7b] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or16mi8: +; NF: # %bb.0: # %entry +; NF-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] +; NF-NEXT: orl $123, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xc8,0x7b] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i16, ptr %a %or = or i16 %t, 123 @@ -224,6 +334,11 @@ define i32 @or32mi8(ptr %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orl $123, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0x0f,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or32mi8: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} orl $123, (%rdi), %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x83,0x0f,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i32, ptr %a %or = or i32 %t, 123 @@ -235,6 +350,11 @@ define i64 @or64mi8(ptr %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orq $123, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0x0f,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or64mi8: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} orq $123, (%rdi), %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x83,0x0f,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i64, ptr %a %or = or i64 %t, 123 @@ -246,6 +366,11 @@ define i8 @or8mi(ptr %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orb $123, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0x80,0x0f,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or8mi: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} orb $123, (%rdi), %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x80,0x0f,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i8, ptr %a %or = or i8 %t, 123 @@ -260,6 +385,14 @@ define i16 @or16mi(ptr %a) { ; CHECK-NEXT: # imm = 0x4D2 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or16mi: +; NF: # %bb.0: # %entry +; NF-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] +; NF-NEXT: orl $1234, %eax # EVEX TO LEGACY Compression encoding: [0x0d,0xd2,0x04,0x00,0x00] +; NF-NEXT: # imm = 0x4D2 +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i16, ptr %a %or = or i16 %t, 1234 @@ -272,6 +405,12 @@ define i32 @or32mi(ptr %a) { ; CHECK-NEXT: orl $123456, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0x0f,0x40,0xe2,0x01,0x00] ; CHECK-NEXT: # imm = 0x1E240 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or32mi: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} orl $123456, (%rdi), %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0x0f,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i32, ptr %a %or = or i32 %t, 123456 @@ -284,6 +423,12 @@ define i64 @or64mi(ptr %a) { ; CHECK-NEXT: orq $123456, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0x0f,0x40,0xe2,0x01,0x00] ; CHECK-NEXT: # imm = 0x1E240 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or64mi: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} orq $123456, (%rdi), %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x81,0x0f,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i64, ptr %a %or = or i64 %t, 123456 @@ -301,6 +446,15 @@ define i1 @orflag8rr(i8 %a, i8 %b) { ; CHECK-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: orflag8rr: +; NF: # %bb.0: +; NF-NEXT: notb %sil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xf6,0xd6] +; NF-NEXT: orb %al, %dil, %cl # encoding: [0x62,0xf4,0x74,0x18,0x08,0xc7] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %xor = xor i8 %b, -1 %v0 = or i8 %a, %xor ; 0xff << 50 %v1 = icmp eq i8 %v0, 0 @@ -317,6 +471,15 @@ define i1 @orflag16rr(i16 %a, i16 %b) { ; CHECK-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: orflag16rr: +; NF: # %bb.0: +; NF-NEXT: notl %esi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xf7,0xd6] +; NF-NEXT: orw %ax, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x09,0xc7] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %xor = xor i16 %b, -1 %v0 = or i16 %a, %xor ; 0xff << 50 %v1 = icmp eq i16 %v0, 0 @@ -332,6 +495,14 @@ define i1 @orflag32rr(i32 %a, i32 %b) { ; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: orflag32rr: +; NF: # %bb.0: +; NF-NEXT: orl %esi, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x09,0xf7] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %v0 = or i32 %a, %b ; 0xff << 50 %v1 = icmp eq i32 %v0, 0 store i32 %v0, ptr @d64 @@ -346,6 +517,14 @@ define i1 @orflag64rr(i64 %a, i64 %b) { ; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: orflag64rr: +; NF: # %bb.0: +; NF-NEXT: orq %rsi, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x09,0xf7] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %v0 = or i64 %a, %b ; 0xff << 50 %v1 = icmp eq i64 %v0, 0 store i64 %v0, ptr @d64 @@ -361,6 +540,15 @@ define i1 @orflag8rm(ptr %ptr, i8 %b) { ; CHECK-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: orflag8rm: +; NF: # %bb.0: +; NF-NEXT: notb %sil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xf6,0xd6] +; NF-NEXT: orb (%rdi), %al, %cl # encoding: [0x62,0xf4,0x74,0x18,0x0a,0x07] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %a = load i8, ptr %ptr %xor = xor i8 %b, -1 %v0 = or i8 %a, %xor ; 0xff << 50 @@ -378,6 +566,15 @@ define i1 @orflag16rm(ptr %ptr, i16 %b) { ; CHECK-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: orflag16rm: +; NF: # %bb.0: +; NF-NEXT: notl %esi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xf7,0xd6] +; NF-NEXT: orw (%rdi), %ax, %cx # encoding: [0x62,0xf4,0x75,0x18,0x0b,0x07] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %a = load i16, ptr %ptr %xor = xor i16 %b, -1 %v0 = or i16 %a, %xor ; 0xff << 50 @@ -394,6 +591,14 @@ define i1 @orflag32rm(ptr %ptr, i32 %b) { ; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: orflag32rm: +; NF: # %bb.0: +; NF-NEXT: orl (%rdi), %esi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x0b,0x37] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %a = load i32, ptr %ptr %v0 = or i32 %a, %b ; 0xff << 50 %v1 = icmp eq i32 %v0, 0 @@ -409,6 +614,14 @@ define i1 @orflag64rm(ptr %ptr, i64 %b) { ; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: orflag64rm: +; NF: # %bb.0: +; NF-NEXT: orq (%rdi), %rsi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x0b,0x37] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %a = load i64, ptr %ptr %v0 = or i64 %a, %b ; 0xff << 50 %v1 = icmp eq i64 %v0, 0 @@ -424,6 +637,14 @@ define i1 @orflag8ri(i8 %a) { ; CHECK-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: orflag8ri: +; NF: # %bb.0: +; NF-NEXT: orb $-124, %dil, %cl # encoding: [0x62,0xf4,0x74,0x18,0x80,0xcf,0x84] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %xor = xor i8 123, -1 %v0 = or i8 %a, %xor ; 0xff << 50 %v1 = icmp eq i8 %v0, 0 @@ -440,6 +661,15 @@ define i1 @orflag16ri(i16 %a) { ; CHECK-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: orflag16ri: +; NF: # %bb.0: +; NF-NEXT: orw $-1235, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x81,0xcf,0x2d,0xfb] +; NF-NEXT: # imm = 0xFB2D +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %xor = xor i16 1234, -1 %v0 = or i16 %a, %xor ; 0xff << 50 %v1 = icmp eq i16 %v0, 0 @@ -456,6 +686,15 @@ define i1 @orflag32ri(i32 %a) { ; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: orflag32ri: +; NF: # %bb.0: +; NF-NEXT: orl $123456, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x81,0xcf,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %v0 = or i32 %a, 123456 ; 0xff << 50 %v1 = icmp eq i32 %v0, 0 store i32 %v0, ptr @d64 @@ -471,6 +710,15 @@ define i1 @orflag64ri(i64 %a) { ; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: orflag64ri: +; NF: # %bb.0: +; NF-NEXT: orq $123456, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xcf,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %v0 = or i64 %a, 123456 ; 0xff << 50 %v1 = icmp eq i64 %v0, 0 store i64 %v0, ptr @d64 @@ -485,6 +733,14 @@ define i1 @orflag16ri8(i16 %a) { ; CHECK-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: orflag16ri8: +; NF: # %bb.0: +; NF-NEXT: orw $-124, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x83,0xcf,0x84] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %xor = xor i16 123, -1 %v0 = or i16 %a, %xor ; 0xff << 50 %v1 = icmp eq i16 %v0, 0 @@ -500,6 +756,14 @@ define i1 @orflag32ri8(i32 %a) { ; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: orflag32ri8: +; NF: # %bb.0: +; NF-NEXT: orl $123, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x83,0xcf,0x7b] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %v0 = or i32 %a, 123 ; 0xff << 50 %v1 = icmp eq i32 %v0, 0 store i32 %v0, ptr @d64 @@ -514,6 +778,14 @@ define i1 @orflag64ri8(i64 %a) { ; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: orflag64ri8: +; NF: # %bb.0: +; NF-NEXT: orq $123, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x83,0xcf,0x7b] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %v0 = or i64 %a, 123 ; 0xff << 50 %v1 = icmp eq i64 %v0, 0 store i64 %v0, ptr @d64 @@ -525,6 +797,11 @@ define void @or8mr_legacy(ptr %a, i8 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orb %sil, (%rdi) # encoding: [0x40,0x08,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or8mr_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: orb %sil, (%rdi) # encoding: [0x40,0x08,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i8, ptr %a %or = or i8 %t, %b @@ -537,6 +814,11 @@ define void @or16mr_legacy(ptr %a, i16 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orw %si, (%rdi) # encoding: [0x66,0x09,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or16mr_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: orw %si, (%rdi) # encoding: [0x66,0x09,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i16, ptr %a %or = or i16 %t, %b @@ -549,6 +831,11 @@ define void @or32mr_legacy(ptr %a, i32 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orl %esi, (%rdi) # encoding: [0x09,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or32mr_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: orl %esi, (%rdi) # encoding: [0x09,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i32, ptr %a %or = or i32 %t, %b @@ -561,6 +848,11 @@ define void @or64mr_legacy(ptr %a, i64 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orq %rsi, (%rdi) # encoding: [0x48,0x09,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or64mr_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: orq %rsi, (%rdi) # encoding: [0x48,0x09,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i64, ptr %a %or = or i64 %t, %b @@ -573,6 +865,11 @@ define void @or8mi_legacy(ptr %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orb $123, (%rdi) # encoding: [0x80,0x0f,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or8mi_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: orb $123, (%rdi) # encoding: [0x80,0x0f,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i8, ptr %a %or = or i8 %t, 123 @@ -586,6 +883,12 @@ define void @or16mi_legacy(ptr %a) { ; CHECK-NEXT: orw $1234, (%rdi) # encoding: [0x66,0x81,0x0f,0xd2,0x04] ; CHECK-NEXT: # imm = 0x4D2 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or16mi_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: orw $1234, (%rdi) # encoding: [0x66,0x81,0x0f,0xd2,0x04] +; NF-NEXT: # imm = 0x4D2 +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i16, ptr %a %or = or i16 %t, 1234 @@ -599,6 +902,12 @@ define void @or32mi_legacy(ptr %a) { ; CHECK-NEXT: orl $123456, (%rdi) # encoding: [0x81,0x0f,0x40,0xe2,0x01,0x00] ; CHECK-NEXT: # imm = 0x1E240 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or32mi_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: orl $123456, (%rdi) # encoding: [0x81,0x0f,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i32, ptr %a %or = or i32 %t, 123456 @@ -612,6 +921,12 @@ define void @or64mi_legacy(ptr %a) { ; CHECK-NEXT: orq $123456, (%rdi) # encoding: [0x48,0x81,0x0f,0x40,0xe2,0x01,0x00] ; CHECK-NEXT: # imm = 0x1E240 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: or64mi_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: orq $123456, (%rdi) # encoding: [0x48,0x81,0x0f,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i64, ptr %a %or = or i64 %t, 123456 diff --git a/llvm/test/CodeGen/X86/apx/shl.ll b/llvm/test/CodeGen/X86/apx/shl.ll index 869caf932ff92..35b6cb27254b2 100644 --- a/llvm/test/CodeGen/X86/apx/shl.ll +++ b/llvm/test/CodeGen/X86/apx/shl.ll @@ -1,11 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd -verify-machineinstrs --show-mc-encoding | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd,nf -verify-machineinstrs --show-mc-encoding | FileCheck --check-prefix=NF %s define i8 @shl8ri(i8 noundef %a) { ; CHECK-LABEL: shl8ri: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shlb $4, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xc0,0xe7,0x04] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl8ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} shlb $4, %dil, %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xc0,0xe7,0x04] +; NF-NEXT: retq # encoding: [0xc3] entry: %shl = shl i8 %a, 4 ret i8 %shl @@ -17,6 +23,12 @@ define i16 @shl16ri(i16 noundef %a) { ; CHECK-NEXT: shll $4, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0xe7,0x04] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl16ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} shll $4, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xc1,0xe7,0x04] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %shl = shl i16 %a, 4 ret i16 %shl @@ -27,6 +39,11 @@ define i32 @shl32ri(i32 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shll $4, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0xe7,0x04] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl32ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} shll $4, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xc1,0xe7,0x04] +; NF-NEXT: retq # encoding: [0xc3] entry: %shl = shl i32 %a, 4 ret i32 %shl @@ -37,6 +54,11 @@ define i64 @shl64ri(i64 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shlq $4, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0xe7,0x04] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl64ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} shlq $4, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0xc1,0xe7,0x04] +; NF-NEXT: retq # encoding: [0xc3] entry: %shl = shl i64 %a, 4 ret i64 %shl @@ -48,6 +70,12 @@ define i8 @shl8m1(ptr %ptr) { ; CHECK-NEXT: movzbl (%rdi), %eax # encoding: [0x0f,0xb6,0x07] ; CHECK-NEXT: addb %al, %al # EVEX TO LEGACY Compression encoding: [0x00,0xc0] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl8m1: +; NF: # %bb.0: # %entry +; NF-NEXT: movzbl (%rdi), %eax # encoding: [0x0f,0xb6,0x07] +; NF-NEXT: addb %al, %al # EVEX TO LEGACY Compression encoding: [0x00,0xc0] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i8, ptr %ptr %shl = shl i8 %a, 1 @@ -61,6 +89,13 @@ define i16 @shl16m1(ptr %ptr) { ; CHECK-NEXT: addl %eax, %eax # EVEX TO LEGACY Compression encoding: [0x01,0xc0] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl16m1: +; NF: # %bb.0: # %entry +; NF-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] +; NF-NEXT: addl %eax, %eax # EVEX TO LEGACY Compression encoding: [0x01,0xc0] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i16, ptr %ptr %shl = shl i16 %a, 1 @@ -73,6 +108,12 @@ define i32 @shl32m1(ptr %ptr) { ; CHECK-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07] ; CHECK-NEXT: addl %eax, %eax # EVEX TO LEGACY Compression encoding: [0x01,0xc0] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl32m1: +; NF: # %bb.0: # %entry +; NF-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07] +; NF-NEXT: addl %eax, %eax # EVEX TO LEGACY Compression encoding: [0x01,0xc0] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i32, ptr %ptr %shl = shl i32 %a, 1 @@ -85,6 +126,12 @@ define i64 @shl64m1(ptr %ptr) { ; CHECK-NEXT: movq (%rdi), %rax # encoding: [0x48,0x8b,0x07] ; CHECK-NEXT: addq %rax, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x01,0xc0] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl64m1: +; NF: # %bb.0: # %entry +; NF-NEXT: movq (%rdi), %rax # encoding: [0x48,0x8b,0x07] +; NF-NEXT: addq %rax, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x01,0xc0] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i64, ptr %ptr %shl = shl i64 %a, 1 @@ -98,6 +145,13 @@ define i8 @shl8mcl(ptr %ptr, i8 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shlb %cl, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0xd2,0x27] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl8mcl: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: {nf} shlb %cl, (%rdi), %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xd2,0x27] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i8, ptr %ptr %shl = shl i8 %a, %cl @@ -111,6 +165,13 @@ define i8 @shl8mcl_mask(ptr %ptr, i8 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shlb %cl, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0xd2,0x27] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl8mcl_mask: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: {nf} shlb %cl, (%rdi), %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xd2,0x27] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i8, ptr %ptr %shamt = and i8 %cl, 31 @@ -127,6 +188,15 @@ define i16 @shl16mcl(ptr %ptr, i16 %cl) { ; CHECK-NEXT: shll %cl, %eax # EVEX TO LEGACY Compression encoding: [0xd3,0xe0] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl16mcl: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: shll %cl, %eax # EVEX TO LEGACY Compression encoding: [0xd3,0xe0] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i16, ptr %ptr %shl = shl i16 %a, %cl @@ -142,6 +212,15 @@ define i16 @shl16mcl_mask(ptr %ptr, i16 %cl) { ; CHECK-NEXT: shll %cl, %eax # EVEX TO LEGACY Compression encoding: [0xd3,0xe0] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl16mcl_mask: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: shll %cl, %eax # EVEX TO LEGACY Compression encoding: [0xd3,0xe0] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i16, ptr %ptr %shamt = and i16 %cl, 31 @@ -156,6 +235,13 @@ define i32 @shl32mcl(ptr %ptr, i32 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shll %cl, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0x27] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl32mcl: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: {nf} shll %cl, (%rdi), %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xd3,0x27] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i32, ptr %ptr %shl = shl i32 %a, %cl @@ -169,6 +255,13 @@ define i32 @shl32mcl_mask(ptr %ptr, i32 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shll %cl, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0x27] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl32mcl_mask: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: {nf} shll %cl, (%rdi), %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xd3,0x27] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i32, ptr %ptr %shamt = and i32 %cl, 31 @@ -183,6 +276,13 @@ define i64 @shl64mcl(ptr %ptr, i64 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-NEXT: shlq %cl, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0x27] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl64mcl: +; NF: # %bb.0: # %entry +; NF-NEXT: movq %rsi, %rcx # encoding: [0x48,0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $rcx +; NF-NEXT: {nf} shlq %cl, (%rdi), %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0xd3,0x27] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i64, ptr %ptr %shl = shl i64 %a, %cl @@ -196,6 +296,13 @@ define i64 @shl64mcl_mask(ptr %ptr, i64 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-NEXT: shlq %cl, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0x27] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl64mcl_mask: +; NF: # %bb.0: # %entry +; NF-NEXT: movq %rsi, %rcx # encoding: [0x48,0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $rcx +; NF-NEXT: {nf} shlq %cl, (%rdi), %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0xd3,0x27] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i64, ptr %ptr %shamt = and i64 %cl, 63 @@ -208,6 +315,11 @@ define i8 @shl8mi(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shlb $4, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0xc0,0x27,0x04] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl8mi: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} shlb $4, (%rdi), %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xc0,0x27,0x04] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i8, ptr %ptr %shl = shl i8 %a, 4 @@ -221,6 +333,13 @@ define i16 @shl16mi(ptr %ptr) { ; CHECK-NEXT: shll $4, %eax # EVEX TO LEGACY Compression encoding: [0xc1,0xe0,0x04] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl16mi: +; NF: # %bb.0: # %entry +; NF-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] +; NF-NEXT: shll $4, %eax # EVEX TO LEGACY Compression encoding: [0xc1,0xe0,0x04] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i16, ptr %ptr %shl = shl i16 %a, 4 @@ -232,6 +351,11 @@ define i32 @shl32mi(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shll $4, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0x27,0x04] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl32mi: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} shll $4, (%rdi), %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xc1,0x27,0x04] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i32, ptr %ptr %shl = shl i32 %a, 4 @@ -243,6 +367,11 @@ define i64 @shl64mi(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shlq $4, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0x27,0x04] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl64mi: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} shlq $4, (%rdi), %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0xc1,0x27,0x04] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i64, ptr %ptr %shl = shl i64 %a, 4 @@ -254,6 +383,11 @@ define i8 @shl8r1(i8 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addb %dil, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0x00,0xff] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl8r1: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addb %dil, %dil, %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x00,0xff] +; NF-NEXT: retq # encoding: [0xc3] entry: %shl = shl i8 %a, 1 ret i8 %shl @@ -265,6 +399,12 @@ define i16 @shl16r1(i16 noundef %a) { ; CHECK-NEXT: addl %edi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xff] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl16r1: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addl %edi, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x01,0xff] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %shl = shl i16 %a, 1 ret i16 %shl @@ -275,6 +415,11 @@ define i32 @shl32r1(i32 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addl %edi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xff] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl32r1: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addl %edi, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x01,0xff] +; NF-NEXT: retq # encoding: [0xc3] entry: %shl = shl i32 %a, 1 ret i32 %shl @@ -285,6 +430,11 @@ define i64 @shl64r1(i64 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addq %rdi, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x01,0xff] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl64r1: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addq %rdi, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x01,0xff] +; NF-NEXT: retq # encoding: [0xc3] entry: %shl = shl i64 %a, 1 ret i64 %shl @@ -297,6 +447,13 @@ define i8 @shl8rcl(i8 noundef %a, i8 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shlb %cl, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xd2,0xe7] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl8rcl: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: {nf} shlb %cl, %dil, %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xd2,0xe7] +; NF-NEXT: retq # encoding: [0xc3] entry: %shl = shl i8 %a, %cl ret i8 %shl @@ -309,6 +466,13 @@ define i8 @shl8rcl_mask(i8 noundef %a, i8 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shlb %cl, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xd2,0xe7] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl8rcl_mask: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: {nf} shlb %cl, %dil, %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xd2,0xe7] +; NF-NEXT: retq # encoding: [0xc3] entry: %shamt = and i8 %cl, 31 %shl = shl i8 %a, %shamt @@ -323,6 +487,14 @@ define i16 @shl16rcl(i16 noundef %a, i16 %cl) { ; CHECK-NEXT: shll %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xe7] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl16rcl: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: {nf} shll %cl, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xd3,0xe7] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %shl = shl i16 %a, %cl ret i16 %shl @@ -336,6 +508,14 @@ define i16 @shl16rcl_mask(i16 noundef %a, i16 %cl) { ; CHECK-NEXT: shll %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xe7] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl16rcl_mask: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: {nf} shll %cl, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xd3,0xe7] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %shamt = and i16 %cl, 31 %shl = shl i16 %a, %shamt @@ -349,6 +529,13 @@ define i32 @shl32rcl(i32 noundef %a, i32 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shll %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xe7] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl32rcl: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: {nf} shll %cl, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xd3,0xe7] +; NF-NEXT: retq # encoding: [0xc3] entry: %shl = shl i32 %a, %cl ret i32 %shl @@ -361,6 +548,13 @@ define i32 @shl32rcl_mask(i32 noundef %a, i32 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shll %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xe7] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl32rcl_mask: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: {nf} shll %cl, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xd3,0xe7] +; NF-NEXT: retq # encoding: [0xc3] entry: %shamt = and i32 %cl, 31 %shl = shl i32 %a, %shamt @@ -374,6 +568,13 @@ define i64 @shl64rcl(i64 noundef %a, i64 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-NEXT: shlq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xe7] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl64rcl: +; NF: # %bb.0: # %entry +; NF-NEXT: movq %rsi, %rcx # encoding: [0x48,0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $rcx +; NF-NEXT: {nf} shlq %cl, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0xd3,0xe7] +; NF-NEXT: retq # encoding: [0xc3] entry: %shl = shl i64 %a, %cl ret i64 %shl @@ -386,6 +587,13 @@ define i64 @shl64rcl_mask(i64 noundef %a, i64 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-NEXT: shlq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xe7] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl64rcl_mask: +; NF: # %bb.0: # %entry +; NF-NEXT: movq %rsi, %rcx # encoding: [0x48,0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $rcx +; NF-NEXT: {nf} shlq %cl, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0xd3,0xe7] +; NF-NEXT: retq # encoding: [0xc3] entry: %shamt = and i64 %cl, 63 %shl = shl i64 %a, %shamt @@ -397,6 +605,11 @@ define void @shl8m1_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shlb (%rdi) # encoding: [0xd0,0x27] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl8m1_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: shlb (%rdi) # encoding: [0xd0,0x27] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i8, ptr %ptr %shl = shl i8 %a, 1 @@ -409,6 +622,11 @@ define void @shl16m1_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shlw (%rdi) # encoding: [0x66,0xd1,0x27] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl16m1_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: shlw (%rdi) # encoding: [0x66,0xd1,0x27] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i16, ptr %ptr %shl = shl i16 %a, 1 @@ -421,6 +639,11 @@ define void @shl32m1_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shll (%rdi) # encoding: [0xd1,0x27] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl32m1_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: shll (%rdi) # encoding: [0xd1,0x27] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i32, ptr %ptr %shl = shl i32 %a, 1 @@ -433,6 +656,11 @@ define void @shl64m1_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shlq (%rdi) # encoding: [0x48,0xd1,0x27] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl64m1_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: shlq (%rdi) # encoding: [0x48,0xd1,0x27] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i64, ptr %ptr %shl = shl i64 %a, 1 @@ -445,6 +673,11 @@ define void @shl8mi_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shlb $4, (%rdi) # encoding: [0xc0,0x27,0x04] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl8mi_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: shlb $4, (%rdi) # encoding: [0xc0,0x27,0x04] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i8, ptr %ptr %shl = shl i8 %a, 4 @@ -457,6 +690,11 @@ define void @shl16mi_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shlw $4, (%rdi) # encoding: [0x66,0xc1,0x27,0x04] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl16mi_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: shlw $4, (%rdi) # encoding: [0x66,0xc1,0x27,0x04] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i16, ptr %ptr %shl = shl i16 %a, 4 @@ -469,6 +707,11 @@ define void @shl32mi_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shll $4, (%rdi) # encoding: [0xc1,0x27,0x04] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl32mi_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: shll $4, (%rdi) # encoding: [0xc1,0x27,0x04] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i32, ptr %ptr %shl = shl i32 %a, 4 @@ -481,6 +724,11 @@ define void @shl64mi_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shlq $4, (%rdi) # encoding: [0x48,0xc1,0x27,0x04] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl64mi_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: shlq $4, (%rdi) # encoding: [0x48,0xc1,0x27,0x04] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i64, ptr %ptr %shl = shl i64 %a, 4 @@ -495,6 +743,13 @@ define void @shl8mcl_legacy(ptr %ptr, i8 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shlb %cl, (%rdi) # encoding: [0xd2,0x27] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl8mcl_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: shlb %cl, (%rdi) # encoding: [0xd2,0x27] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i8, ptr %ptr %shl = shl i8 %a, %cl @@ -509,6 +764,13 @@ define void @shl16mcl_legacy(ptr %ptr, i16 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shlw %cl, (%rdi) # encoding: [0x66,0xd3,0x27] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl16mcl_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: shlw %cl, (%rdi) # encoding: [0x66,0xd3,0x27] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i16, ptr %ptr %shl = shl i16 %a, %cl @@ -523,6 +785,13 @@ define void @shl32mcl_legacy(ptr %ptr, i32 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shll %cl, (%rdi) # encoding: [0xd3,0x27] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl32mcl_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: shll %cl, (%rdi) # encoding: [0xd3,0x27] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i32, ptr %ptr %shl = shl i32 %a, %cl @@ -537,6 +806,13 @@ define void @shl64mcl_legacy(ptr %ptr, i64 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-NEXT: shlq %cl, (%rdi) # encoding: [0x48,0xd3,0x27] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shl64mcl_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: movq %rsi, %rcx # encoding: [0x48,0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $rcx +; NF-NEXT: shlq %cl, (%rdi) # encoding: [0x48,0xd3,0x27] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i64, ptr %ptr %shl = shl i64 %a, %cl diff --git a/llvm/test/CodeGen/X86/apx/shr.ll b/llvm/test/CodeGen/X86/apx/shr.ll index a7e02d8586f49..b5b91b02fedff 100644 --- a/llvm/test/CodeGen/X86/apx/shr.ll +++ b/llvm/test/CodeGen/X86/apx/shr.ll @@ -1,11 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd -verify-machineinstrs --show-mc-encoding | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd,nf -verify-machineinstrs --show-mc-encoding | FileCheck --check-prefix=NF %s define i8 @shr8m1(ptr %ptr) { ; CHECK-LABEL: shr8m1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shrb (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0xd0,0x2f] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr8m1: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} shrb (%rdi), %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xd0,0x2f] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i8, ptr %ptr %shr = lshr i8 %a, 1 @@ -19,6 +25,13 @@ define i16 @shr16m1(ptr %ptr) { ; CHECK-NEXT: shrl %eax # EVEX TO LEGACY Compression encoding: [0xd1,0xe8] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr16m1: +; NF: # %bb.0: # %entry +; NF-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] +; NF-NEXT: shrl %eax # EVEX TO LEGACY Compression encoding: [0xd1,0xe8] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i16, ptr %ptr %shr = lshr i16 %a, 1 @@ -30,6 +43,11 @@ define i32 @shr32m1(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shrl (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd1,0x2f] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr32m1: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} shrl (%rdi), %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xd1,0x2f] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i32, ptr %ptr %shr = lshr i32 %a, 1 @@ -41,6 +59,11 @@ define i64 @shr64m1(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shrq (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd1,0x2f] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr64m1: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} shrq (%rdi), %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0xd1,0x2f] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i64, ptr %ptr %shr = lshr i64 %a, 1 @@ -54,6 +77,13 @@ define i8 @shr8mcl(ptr %ptr, i8 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shrb %cl, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0xd2,0x2f] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr8mcl: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: {nf} shrb %cl, (%rdi), %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xd2,0x2f] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i8, ptr %ptr %shr = lshr i8 %a, %cl @@ -67,6 +97,13 @@ define i8 @shr8mcl_mask(ptr %ptr, i8 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shrb %cl, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0xd2,0x2f] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr8mcl_mask: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: {nf} shrb %cl, (%rdi), %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xd2,0x2f] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i8, ptr %ptr %shamt = and i8 %cl, 31 @@ -83,6 +120,15 @@ define i16 @shr16mcl(ptr %ptr, i16 %cl) { ; CHECK-NEXT: shrl %cl, %eax # EVEX TO LEGACY Compression encoding: [0xd3,0xe8] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr16mcl: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: shrl %cl, %eax # EVEX TO LEGACY Compression encoding: [0xd3,0xe8] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i16, ptr %ptr %shr = lshr i16 %a, %cl @@ -98,6 +144,15 @@ define i16 @shr16mcl_mask(ptr %ptr, i16 %cl) { ; CHECK-NEXT: shrl %cl, %eax # EVEX TO LEGACY Compression encoding: [0xd3,0xe8] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr16mcl_mask: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: shrl %cl, %eax # EVEX TO LEGACY Compression encoding: [0xd3,0xe8] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i16, ptr %ptr %shamt = and i16 %cl, 31 @@ -112,6 +167,13 @@ define i32 @shr32mcl(ptr %ptr, i32 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shrl %cl, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0x2f] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr32mcl: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: {nf} shrl %cl, (%rdi), %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xd3,0x2f] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i32, ptr %ptr %shr = lshr i32 %a, %cl @@ -125,6 +187,13 @@ define i32 @shr32mcl_mask(ptr %ptr, i32 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shrl %cl, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0x2f] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr32mcl_mask: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: {nf} shrl %cl, (%rdi), %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xd3,0x2f] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i32, ptr %ptr %shamt = and i32 %cl, 31 @@ -139,6 +208,13 @@ define i64 @shr64mcl(ptr %ptr, i64 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-NEXT: shrq %cl, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0x2f] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr64mcl: +; NF: # %bb.0: # %entry +; NF-NEXT: movq %rsi, %rcx # encoding: [0x48,0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $rcx +; NF-NEXT: {nf} shrq %cl, (%rdi), %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0xd3,0x2f] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i64, ptr %ptr %shr = lshr i64 %a, %cl @@ -152,6 +228,13 @@ define i64 @shr64mcl_mask(ptr %ptr, i64 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-NEXT: shrq %cl, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0x2f] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr64mcl_mask: +; NF: # %bb.0: # %entry +; NF-NEXT: movq %rsi, %rcx # encoding: [0x48,0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $rcx +; NF-NEXT: {nf} shrq %cl, (%rdi), %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0xd3,0x2f] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i64, ptr %ptr %shamt = and i64 %cl, 63 @@ -164,6 +247,11 @@ define i8 @shr8mi(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shrb $4, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0xc0,0x2f,0x04] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr8mi: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} shrb $4, (%rdi), %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xc0,0x2f,0x04] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i8, ptr %ptr %shr = lshr i8 %a, 4 @@ -177,6 +265,13 @@ define i16 @shr16mi(ptr %ptr) { ; CHECK-NEXT: shrl $4, %eax # EVEX TO LEGACY Compression encoding: [0xc1,0xe8,0x04] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr16mi: +; NF: # %bb.0: # %entry +; NF-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] +; NF-NEXT: shrl $4, %eax # EVEX TO LEGACY Compression encoding: [0xc1,0xe8,0x04] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i16, ptr %ptr %shr = lshr i16 %a, 4 @@ -188,6 +283,11 @@ define i32 @shr32mi(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shrl $4, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0x2f,0x04] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr32mi: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} shrl $4, (%rdi), %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xc1,0x2f,0x04] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i32, ptr %ptr %shr = lshr i32 %a, 4 @@ -199,6 +299,11 @@ define i64 @shr64mi(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shrq $4, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0x2f,0x04] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr64mi: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} shrq $4, (%rdi), %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0xc1,0x2f,0x04] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i64, ptr %ptr %shr = lshr i64 %a, 4 @@ -210,6 +315,11 @@ define i8 @shr8r1(i8 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shrb %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xd0,0xef] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr8r1: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} shrb %dil, %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xd0,0xef] +; NF-NEXT: retq # encoding: [0xc3] entry: %shr = lshr i8 %a, 1 ret i8 %shr @@ -222,6 +332,13 @@ define i16 @shr16r1(i16 noundef %a) { ; CHECK-NEXT: shrl %eax # EVEX TO LEGACY Compression encoding: [0xd1,0xe8] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr16r1: +; NF: # %bb.0: # %entry +; NF-NEXT: movzwl %di, %eax # encoding: [0x0f,0xb7,0xc7] +; NF-NEXT: shrl %eax # EVEX TO LEGACY Compression encoding: [0xd1,0xe8] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %shr = lshr i16 %a, 1 ret i16 %shr @@ -232,6 +349,11 @@ define i32 @shr32r1(i32 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shrl %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd1,0xef] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr32r1: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} shrl %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xd1,0xef] +; NF-NEXT: retq # encoding: [0xc3] entry: %shr = lshr i32 %a, 1 ret i32 %shr @@ -242,6 +364,11 @@ define i64 @shr64r1(i64 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shrq %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd1,0xef] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr64r1: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} shrq %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0xd1,0xef] +; NF-NEXT: retq # encoding: [0xc3] entry: %shr = lshr i64 %a, 1 ret i64 %shr @@ -254,6 +381,13 @@ define i8 @shr8rcl(i8 noundef %a, i8 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shrb %cl, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xd2,0xef] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr8rcl: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: {nf} shrb %cl, %dil, %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xd2,0xef] +; NF-NEXT: retq # encoding: [0xc3] entry: %shr = lshr i8 %a, %cl ret i8 %shr @@ -266,6 +400,13 @@ define i8 @shr8rcl_mask(i8 noundef %a, i8 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shrb %cl, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xd2,0xef] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr8rcl_mask: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: {nf} shrb %cl, %dil, %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xd2,0xef] +; NF-NEXT: retq # encoding: [0xc3] entry: %shamt = and i8 %cl, 31 %shr = lshr i8 %a, %shamt @@ -281,6 +422,15 @@ define i16 @shr16rcl(i16 noundef %a, i16 %cl) { ; CHECK-NEXT: shrl %cl, %eax # EVEX TO LEGACY Compression encoding: [0xd3,0xe8] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr16rcl: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: movzwl %di, %eax # encoding: [0x0f,0xb7,0xc7] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: shrl %cl, %eax # EVEX TO LEGACY Compression encoding: [0xd3,0xe8] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %shr = lshr i16 %a, %cl ret i16 %shr @@ -295,6 +445,15 @@ define i16 @shr16rcl_mask(i16 noundef %a, i16 %cl) { ; CHECK-NEXT: shrl %cl, %eax # EVEX TO LEGACY Compression encoding: [0xd3,0xe8] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr16rcl_mask: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: movzwl %di, %eax # encoding: [0x0f,0xb7,0xc7] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: shrl %cl, %eax # EVEX TO LEGACY Compression encoding: [0xd3,0xe8] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %shamt = and i16 %cl, 31 %shr = lshr i16 %a, %shamt @@ -308,6 +467,13 @@ define i32 @shr32rcl(i32 noundef %a, i32 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shrl %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xef] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr32rcl: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: {nf} shrl %cl, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xd3,0xef] +; NF-NEXT: retq # encoding: [0xc3] entry: %shr = lshr i32 %a, %cl ret i32 %shr @@ -320,6 +486,13 @@ define i32 @shr32rcl_mask(i32 noundef %a, i32 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shrl %cl, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xd3,0xef] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr32rcl_mask: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: {nf} shrl %cl, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xd3,0xef] +; NF-NEXT: retq # encoding: [0xc3] entry: %shamt = and i32 %cl, 31 %shr = lshr i32 %a, %shamt @@ -333,6 +506,13 @@ define i64 @shr64rcl(i64 noundef %a, i64 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-NEXT: shrq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xef] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr64rcl: +; NF: # %bb.0: # %entry +; NF-NEXT: movq %rsi, %rcx # encoding: [0x48,0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $rcx +; NF-NEXT: {nf} shrq %cl, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0xd3,0xef] +; NF-NEXT: retq # encoding: [0xc3] entry: %shr = lshr i64 %a, %cl ret i64 %shr @@ -345,6 +525,13 @@ define i64 @shr64rcl_mask(i64 noundef %a, i64 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-NEXT: shrq %cl, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xd3,0xef] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr64rcl_mask: +; NF: # %bb.0: # %entry +; NF-NEXT: movq %rsi, %rcx # encoding: [0x48,0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $rcx +; NF-NEXT: {nf} shrq %cl, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0xd3,0xef] +; NF-NEXT: retq # encoding: [0xc3] entry: %shamt = and i64 %cl, 63 %shr = lshr i64 %a, %shamt @@ -356,6 +543,11 @@ define i8 @shr8ri(i8 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shrb $4, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xc0,0xef,0x04] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr8ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} shrb $4, %dil, %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xc0,0xef,0x04] +; NF-NEXT: retq # encoding: [0xc3] entry: %shr = lshr i8 %a, 4 ret i8 %shr @@ -368,6 +560,13 @@ define i16 @shr16ri(i16 noundef %a) { ; CHECK-NEXT: shrl $4, %eax # EVEX TO LEGACY Compression encoding: [0xc1,0xe8,0x04] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr16ri: +; NF: # %bb.0: # %entry +; NF-NEXT: movzwl %di, %eax # encoding: [0x0f,0xb7,0xc7] +; NF-NEXT: shrl $4, %eax # EVEX TO LEGACY Compression encoding: [0xc1,0xe8,0x04] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %shr = lshr i16 %a, 4 ret i16 %shr @@ -378,6 +577,11 @@ define i32 @shr32ri(i32 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shrl $4, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0xef,0x04] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr32ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} shrl $4, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0xc1,0xef,0x04] +; NF-NEXT: retq # encoding: [0xc3] entry: %shr = lshr i32 %a, 4 ret i32 %shr @@ -388,6 +592,11 @@ define i64 @shr64ri(i64 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shrq $4, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0xef,0x04] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr64ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} shrq $4, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0xc1,0xef,0x04] +; NF-NEXT: retq # encoding: [0xc3] entry: %shr = lshr i64 %a, 4 ret i64 %shr @@ -398,6 +607,11 @@ define void @shr8m1_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shrb (%rdi) # encoding: [0xd0,0x2f] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr8m1_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: shrb (%rdi) # encoding: [0xd0,0x2f] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i8, ptr %ptr %shr = lshr i8 %a, 1 @@ -410,6 +624,11 @@ define void @shr16m1_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shrw (%rdi) # encoding: [0x66,0xd1,0x2f] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr16m1_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: shrw (%rdi) # encoding: [0x66,0xd1,0x2f] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i16, ptr %ptr %shr = lshr i16 %a, 1 @@ -422,6 +641,11 @@ define void @shr32m1_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shrl (%rdi) # encoding: [0xd1,0x2f] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr32m1_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: shrl (%rdi) # encoding: [0xd1,0x2f] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i32, ptr %ptr %shr = lshr i32 %a, 1 @@ -434,6 +658,11 @@ define void @shr64m1_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shrq (%rdi) # encoding: [0x48,0xd1,0x2f] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr64m1_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: shrq (%rdi) # encoding: [0x48,0xd1,0x2f] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i64, ptr %ptr %shr = lshr i64 %a, 1 @@ -446,6 +675,11 @@ define void @shr8mi_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shrb $4, (%rdi) # encoding: [0xc0,0x2f,0x04] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr8mi_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: shrb $4, (%rdi) # encoding: [0xc0,0x2f,0x04] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i8, ptr %ptr %shr = lshr i8 %a, 4 @@ -458,6 +692,11 @@ define void @shr16mi_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shrw $4, (%rdi) # encoding: [0x66,0xc1,0x2f,0x04] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr16mi_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: shrw $4, (%rdi) # encoding: [0x66,0xc1,0x2f,0x04] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i16, ptr %ptr %shr = lshr i16 %a, 4 @@ -470,6 +709,11 @@ define void @shr32mi_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shrl $4, (%rdi) # encoding: [0xc1,0x2f,0x04] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr32mi_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: shrl $4, (%rdi) # encoding: [0xc1,0x2f,0x04] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i32, ptr %ptr %shr = lshr i32 %a, 4 @@ -482,6 +726,11 @@ define void @shr64mi_legacy(ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shrq $4, (%rdi) # encoding: [0x48,0xc1,0x2f,0x04] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr64mi_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: shrq $4, (%rdi) # encoding: [0x48,0xc1,0x2f,0x04] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i64, ptr %ptr %shr = lshr i64 %a, 4 @@ -496,6 +745,13 @@ define void @shr8mcl_legacy(ptr %ptr, i8 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shrb %cl, (%rdi) # encoding: [0xd2,0x2f] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr8mcl_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: shrb %cl, (%rdi) # encoding: [0xd2,0x2f] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i8, ptr %ptr %shr = lshr i8 %a, %cl @@ -510,6 +766,13 @@ define void @shr16mcl_legacy(ptr %ptr, i16 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shrw %cl, (%rdi) # encoding: [0x66,0xd3,0x2f] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr16mcl_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: shrw %cl, (%rdi) # encoding: [0x66,0xd3,0x2f] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i16, ptr %ptr %shr = lshr i16 %a, %cl @@ -524,6 +787,13 @@ define void @shr32mcl_legacy(ptr %ptr, i32 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shrl %cl, (%rdi) # encoding: [0xd3,0x2f] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr32mcl_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: movl %esi, %ecx # encoding: [0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $ecx +; NF-NEXT: shrl %cl, (%rdi) # encoding: [0xd3,0x2f] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i32, ptr %ptr %shr = lshr i32 %a, %cl @@ -538,6 +808,13 @@ define void @shr64mcl_legacy(ptr %ptr, i64 %cl) { ; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-NEXT: shrq %cl, (%rdi) # encoding: [0x48,0xd3,0x2f] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: shr64mcl_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: movq %rsi, %rcx # encoding: [0x48,0x89,0xf1] +; NF-NEXT: # kill: def $cl killed $cl killed $rcx +; NF-NEXT: shrq %cl, (%rdi) # encoding: [0x48,0xd3,0x2f] +; NF-NEXT: retq # encoding: [0xc3] entry: %a = load i64, ptr %ptr %shr = lshr i64 %a, %cl diff --git a/llvm/test/CodeGen/X86/apx/sub.ll b/llvm/test/CodeGen/X86/apx/sub.ll index be0914c90b9fa..a38d09587ba91 100644 --- a/llvm/test/CodeGen/X86/apx/sub.ll +++ b/llvm/test/CodeGen/X86/apx/sub.ll @@ -1,11 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd -verify-machineinstrs --show-mc-encoding | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd,nf -verify-machineinstrs --show-mc-encoding | FileCheck --check-prefix=NF %s define i8 @sub8rr(i8 noundef %a, i8 noundef %b) { ; CHECK-LABEL: sub8rr: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: subb %sil, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0x28,0xf7] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub8rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} subb %sil, %dil, %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x28,0xf7] +; NF-NEXT: retq # encoding: [0xc3] entry: %sub = sub i8 %a, %b ret i8 %sub @@ -17,6 +23,12 @@ define i16 @sub16rr(i16 noundef %a, i16 noundef %b) { ; CHECK-NEXT: subl %esi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x29,0xf7] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub16rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} subl %esi, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x29,0xf7] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %sub = sub i16 %a, %b ret i16 %sub @@ -27,6 +39,11 @@ define i32 @sub32rr(i32 noundef %a, i32 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: subl %esi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x29,0xf7] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub32rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} subl %esi, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x29,0xf7] +; NF-NEXT: retq # encoding: [0xc3] entry: %sub = sub i32 %a, %b ret i32 %sub @@ -37,6 +54,11 @@ define i64 @sub64rr(i64 noundef %a, i64 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: subq %rsi, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x29,0xf7] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub64rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} subq %rsi, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x29,0xf7] +; NF-NEXT: retq # encoding: [0xc3] entry: %sub = sub i64 %a, %b ret i64 %sub @@ -47,6 +69,11 @@ define i8 @sub8rm(i8 noundef %a, ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: subb (%rsi), %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0x2a,0x3e] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub8rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} subb (%rsi), %dil, %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x2a,0x3e] +; NF-NEXT: retq # encoding: [0xc3] entry: %b = load i8, ptr %ptr %sub = sub i8 %a, %b @@ -58,6 +85,11 @@ define i16 @sub16rm(i16 noundef %a, ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: subw (%rsi), %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x2b,0x3e] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub16rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} subw (%rsi), %di, %ax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7d,0x1c,0x2b,0x3e] +; NF-NEXT: retq # encoding: [0xc3] entry: %b = load i16, ptr %ptr %sub = sub i16 %a, %b @@ -69,6 +101,11 @@ define i32 @sub32rm(i32 noundef %a, ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: subl (%rsi), %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x2b,0x3e] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub32rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} subl (%rsi), %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x2b,0x3e] +; NF-NEXT: retq # encoding: [0xc3] entry: %b = load i32, ptr %ptr %sub = sub i32 %a, %b @@ -80,6 +117,11 @@ define i64 @sub64rm(i64 noundef %a, ptr %ptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: subq (%rsi), %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x2b,0x3e] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub64rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} subq (%rsi), %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x2b,0x3e] +; NF-NEXT: retq # encoding: [0xc3] entry: %b = load i64, ptr %ptr %sub = sub i64 %a, %b @@ -92,6 +134,12 @@ define i16 @sub16ri8(i16 noundef %a) { ; CHECK-NEXT: subl $-128, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xef,0x80] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub16ri8: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} subl $-128, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x83,0xef,0x80] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %sub = sub i16 %a, -128 ret i16 %sub @@ -102,6 +150,11 @@ define i32 @sub32ri8(i32 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: subl $-128, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xef,0x80] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub32ri8: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} subl $-128, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x83,0xef,0x80] +; NF-NEXT: retq # encoding: [0xc3] entry: %sub = sub i32 %a, -128 ret i32 %sub @@ -112,6 +165,11 @@ define i64 @sub64ri8(i64 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: subq $-128, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0xef,0x80] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub64ri8: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} subq $-128, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x83,0xef,0x80] +; NF-NEXT: retq # encoding: [0xc3] entry: %sub = sub i64 %a, -128 ret i64 %sub @@ -122,6 +180,11 @@ define i8 @sub8ri(i8 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addb $-123, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0x80,0xc7,0x85] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub8ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addb $-123, %dil, %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x80,0xc7,0x85] +; NF-NEXT: retq # encoding: [0xc3] entry: %sub = sub i8 %a, 123 ret i8 %sub @@ -134,6 +197,13 @@ define i16 @sub16ri(i16 noundef %a) { ; CHECK-NEXT: # imm = 0xFB2E ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub16ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addl $-1234, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0xc7,0x2e,0xfb,0xff,0xff] +; NF-NEXT: # imm = 0xFB2E +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %sub = sub i16 %a, 1234 ret i16 %sub @@ -145,6 +215,12 @@ define i32 @sub32ri(i32 noundef %a) { ; CHECK-NEXT: addl $-123456, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0xc7,0xc0,0x1d,0xfe,0xff] ; CHECK-NEXT: # imm = 0xFFFE1DC0 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub32ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addl $-123456, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0xc7,0xc0,0x1d,0xfe,0xff] +; NF-NEXT: # imm = 0xFFFE1DC0 +; NF-NEXT: retq # encoding: [0xc3] entry: %sub = sub i32 %a, 123456 ret i32 %sub @@ -156,6 +232,12 @@ define i64 @sub64ri(i64 noundef %a) { ; CHECK-NEXT: subq $-2147483648, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0xef,0x00,0x00,0x00,0x80] ; CHECK-NEXT: # imm = 0x80000000 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub64ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} subq $-2147483648, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x81,0xef,0x00,0x00,0x00,0x80] +; NF-NEXT: # imm = 0x80000000 +; NF-NEXT: retq # encoding: [0xc3] entry: %sub = sub i64 %a, -2147483648 ret i64 %sub @@ -166,6 +248,11 @@ define i8 @sub8mr(ptr %a, i8 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: subb %sil, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0x28,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub8mr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} subb %sil, (%rdi), %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x28,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i8, ptr %a %sub = sub nsw i8 %t, %b @@ -179,6 +266,13 @@ define i16 @sub16mr(ptr %a, i16 noundef %b) { ; CHECK-NEXT: subl %esi, %eax # EVEX TO LEGACY Compression encoding: [0x29,0xf0] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub16mr: +; NF: # %bb.0: # %entry +; NF-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] +; NF-NEXT: subl %esi, %eax # EVEX TO LEGACY Compression encoding: [0x29,0xf0] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i16, ptr %a %sub = sub nsw i16 %t, %b @@ -190,6 +284,11 @@ define i32 @sub32mr(ptr %a, i32 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: subl %esi, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x29,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub32mr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} subl %esi, (%rdi), %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x29,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i32, ptr %a %sub = sub nsw i32 %t, %b @@ -201,6 +300,11 @@ define i64 @sub64mr(ptr %a, i64 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: subq %rsi, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x29,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub64mr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} subq %rsi, (%rdi), %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x29,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i64, ptr %a %sub = sub nsw i64 %t, %b @@ -214,6 +318,13 @@ define i16 @sub16mi8(ptr %a) { ; CHECK-NEXT: subl $-128, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xe8,0x80] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub16mi8: +; NF: # %bb.0: # %entry +; NF-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] +; NF-NEXT: subl $-128, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xe8,0x80] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i16, ptr %a %sub = sub nsw i16 %t, -128 @@ -225,6 +336,11 @@ define i32 @sub32mi8(ptr %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: subl $-128, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0x2f,0x80] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub32mi8: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} subl $-128, (%rdi), %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x83,0x2f,0x80] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i32, ptr %a %sub = sub nsw i32 %t, -128 @@ -236,6 +352,11 @@ define i64 @sub64mi8(ptr %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: subq $-128, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0x2f,0x80] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub64mi8: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} subq $-128, (%rdi), %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x83,0x2f,0x80] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i64, ptr %a %sub = sub nsw i64 %t, -128 @@ -247,6 +368,11 @@ define i8 @sub8mi(ptr %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addb $-123, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0x80,0x07,0x85] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub8mi: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addb $-123, (%rdi), %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x80,0x07,0x85] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i8, ptr %a %sub = sub nsw i8 %t, 123 @@ -261,6 +387,14 @@ define i16 @sub16mi(ptr %a) { ; CHECK-NEXT: # imm = 0xFB2E ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub16mi: +; NF: # %bb.0: # %entry +; NF-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] +; NF-NEXT: addl $-1234, %eax # EVEX TO LEGACY Compression encoding: [0x05,0x2e,0xfb,0xff,0xff] +; NF-NEXT: # imm = 0xFB2E +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i16, ptr %a %sub = sub nsw i16 %t, 1234 @@ -273,6 +407,12 @@ define i32 @sub32mi(ptr %a) { ; CHECK-NEXT: addl $-123456, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0x07,0xc0,0x1d,0xfe,0xff] ; CHECK-NEXT: # imm = 0xFFFE1DC0 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub32mi: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} addl $-123456, (%rdi), %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0x07,0xc0,0x1d,0xfe,0xff] +; NF-NEXT: # imm = 0xFFFE1DC0 +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i32, ptr %a %sub = sub nsw i32 %t, 123456 @@ -285,6 +425,12 @@ define i64 @sub64mi(ptr %a) { ; CHECK-NEXT: subq $-2147483648, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0x2f,0x00,0x00,0x00,0x80] ; CHECK-NEXT: # imm = 0x80000000 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub64mi: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} subq $-2147483648, (%rdi), %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x81,0x2f,0x00,0x00,0x00,0x80] +; NF-NEXT: # imm = 0x80000000 +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i64, ptr %a %sub = sub nsw i64 %t, -2147483648 @@ -305,6 +451,15 @@ define i8 @subflag8rr(i8 noundef %a, i8 noundef %b) { ; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: subflag8rr: +; NF: # %bb.0: # %entry +; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; NF-NEXT: subb %sil, %dil, %cl # encoding: [0x62,0xf4,0x74,0x18,0x28,0xf7] +; NF-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9] +; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1] +; NF-NEXT: # kill: def $al killed $al killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %sub = call i8 @llvm.usub.sat.i8(i8 %a, i8 %b) ret i8 %sub @@ -318,6 +473,14 @@ define i16 @subflag16rr(i16 noundef %a, i16 noundef %b) { ; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: subflag16rr: +; NF: # %bb.0: # %entry +; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; NF-NEXT: subw %si, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x29,0xf7] +; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %sub = call i16 @llvm.usub.sat.i16(i16 %a, i16 %b) ret i16 %sub @@ -330,6 +493,13 @@ define i32 @subflag32rr(i32 noundef %a, i32 noundef %b) { ; CHECK-NEXT: subl %esi, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x29,0xf7] ; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: subflag32rr: +; NF: # %bb.0: # %entry +; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; NF-NEXT: subl %esi, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x29,0xf7] +; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1] +; NF-NEXT: retq # encoding: [0xc3] entry: %sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 %b) ret i32 %sub @@ -342,6 +512,13 @@ define i64 @subflag64rr(i64 noundef %a, i64 noundef %b) { ; CHECK-NEXT: subq %rsi, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x29,0xf7] ; CHECK-NEXT: cmovaeq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc1] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: subflag64rr: +; NF: # %bb.0: # %entry +; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; NF-NEXT: subq %rsi, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x29,0xf7] +; NF-NEXT: cmovaeq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc1] +; NF-NEXT: retq # encoding: [0xc3] entry: %sub = call i64 @llvm.usub.sat.i64(i64 %a, i64 %b) ret i64 %sub @@ -356,6 +533,15 @@ define i8 @subflag8rm(i8 noundef %a, ptr %b) { ; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: subflag8rm: +; NF: # %bb.0: # %entry +; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; NF-NEXT: subb (%rsi), %dil, %cl # encoding: [0x62,0xf4,0x74,0x18,0x2a,0x3e] +; NF-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9] +; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1] +; NF-NEXT: # kill: def $al killed $al killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %t = load i8, ptr %b %sub = call i8 @llvm.usub.sat.i8(i8 %a, i8 %t) @@ -370,6 +556,14 @@ define i16 @subflag16rm(i16 noundef %a, ptr %b) { ; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: subflag16rm: +; NF: # %bb.0: # %entry +; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; NF-NEXT: subw (%rsi), %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x2b,0x3e] +; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %t = load i16, ptr %b %sub = call i16 @llvm.usub.sat.i16(i16 %a, i16 %t) @@ -383,6 +577,13 @@ define i32 @subflag32rm(i32 noundef %a, ptr %b) { ; CHECK-NEXT: subl (%rsi), %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x2b,0x3e] ; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: subflag32rm: +; NF: # %bb.0: # %entry +; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; NF-NEXT: subl (%rsi), %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x2b,0x3e] +; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1] +; NF-NEXT: retq # encoding: [0xc3] entry: %t = load i32, ptr %b %sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 %t) @@ -396,6 +597,13 @@ define i64 @subflag64rm(i64 noundef %a, ptr %b) { ; CHECK-NEXT: subq (%rsi), %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x2b,0x3e] ; CHECK-NEXT: cmovaeq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc1] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: subflag64rm: +; NF: # %bb.0: # %entry +; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; NF-NEXT: subq (%rsi), %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x2b,0x3e] +; NF-NEXT: cmovaeq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc1] +; NF-NEXT: retq # encoding: [0xc3] entry: %t = load i64, ptr %b %sub = call i64 @llvm.usub.sat.i64(i64 %a, i64 %t) @@ -410,6 +618,14 @@ define i16 @subflag16ri8(i16 noundef %a) { ; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: subflag16ri8: +; NF: # %bb.0: # %entry +; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; NF-NEXT: subw $123, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x83,0xef,0x7b] +; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %sub = call i16 @llvm.usub.sat.i16(i16 %a, i16 123) ret i16 %sub @@ -422,6 +638,13 @@ define i32 @subflag32ri8(i32 noundef %a) { ; CHECK-NEXT: subl $123, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x83,0xef,0x7b] ; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: subflag32ri8: +; NF: # %bb.0: # %entry +; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; NF-NEXT: subl $123, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x83,0xef,0x7b] +; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1] +; NF-NEXT: retq # encoding: [0xc3] entry: %sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 123) ret i32 %sub @@ -434,6 +657,13 @@ define i64 @subflag64ri8(i64 noundef %a) { ; CHECK-NEXT: subq $123, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x83,0xef,0x7b] ; CHECK-NEXT: cmovaeq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc1] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: subflag64ri8: +; NF: # %bb.0: # %entry +; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; NF-NEXT: subq $123, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x83,0xef,0x7b] +; NF-NEXT: cmovaeq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc1] +; NF-NEXT: retq # encoding: [0xc3] entry: %sub = call i64 @llvm.usub.sat.i64(i64 %a, i64 123) ret i64 %sub @@ -448,6 +678,15 @@ define i8 @subflag8ri(i8 noundef %a) { ; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: subflag8ri: +; NF: # %bb.0: # %entry +; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; NF-NEXT: subb $123, %dil, %cl # encoding: [0x62,0xf4,0x74,0x18,0x80,0xef,0x7b] +; NF-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9] +; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1] +; NF-NEXT: # kill: def $al killed $al killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %sub = call i8 @llvm.usub.sat.i8(i8 %a, i8 123) ret i8 %sub @@ -462,6 +701,15 @@ define i16 @subflag16ri(i16 noundef %a) { ; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: subflag16ri: +; NF: # %bb.0: # %entry +; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; NF-NEXT: subw $1234, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x81,0xef,0xd2,0x04] +; NF-NEXT: # imm = 0x4D2 +; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %sub = call i16 @llvm.usub.sat.i16(i16 %a, i16 1234) ret i16 %sub @@ -475,6 +723,14 @@ define i32 @subflag32ri(i32 noundef %a) { ; CHECK-NEXT: # imm = 0x1E240 ; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: subflag32ri: +; NF: # %bb.0: # %entry +; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; NF-NEXT: subl $123456, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x81,0xef,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1] +; NF-NEXT: retq # encoding: [0xc3] entry: %sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 123456) ret i32 %sub @@ -488,6 +744,14 @@ define i64 @subflag64ri(i64 noundef %a) { ; CHECK-NEXT: # imm = 0x1E240 ; CHECK-NEXT: cmovaeq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc1] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: subflag64ri: +; NF: # %bb.0: # %entry +; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; NF-NEXT: subq $123456, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xef,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: cmovaeq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc1] +; NF-NEXT: retq # encoding: [0xc3] entry: %sub = call i64 @llvm.usub.sat.i64(i64 %a, i64 123456) ret i64 %sub @@ -513,6 +777,22 @@ define void @sub64ri_reloc(i64 %val) { ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: .LBB41_2: # %f ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub64ri_reloc: +; NF: # %bb.0: +; NF-NEXT: cmpq $val, %rdi # encoding: [0x48,0x81,0xff,A,A,A,A] +; NF-NEXT: # fixup A - offset: 3, value: val, kind: reloc_signed_4byte +; NF-NEXT: jbe .LBB41_2 # encoding: [0x76,A] +; NF-NEXT: # fixup A - offset: 1, value: .LBB41_2-1, kind: FK_PCRel_1 +; NF-NEXT: # %bb.1: # %t +; NF-NEXT: pushq %rax # encoding: [0x50] +; NF-NEXT: .cfi_def_cfa_offset 16 +; NF-NEXT: callq f@PLT # encoding: [0xe8,A,A,A,A] +; NF-NEXT: # fixup A - offset: 1, value: f@PLT-4, kind: FK_PCRel_4 +; NF-NEXT: popq %rax # encoding: [0x58] +; NF-NEXT: .cfi_def_cfa_offset 8 +; NF-NEXT: .LBB41_2: # %f +; NF-NEXT: retq # encoding: [0xc3] %cmp = icmp ugt i64 %val, ptrtoint (ptr @val to i64) br i1 %cmp, label %t, label %f @@ -529,6 +809,11 @@ define void @sub8mr_legacy(ptr %a, i8 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: subb %sil, (%rdi) # encoding: [0x40,0x28,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub8mr_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: subb %sil, (%rdi) # encoding: [0x40,0x28,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i8, ptr %a %sub = sub i8 %t, %b @@ -541,6 +826,11 @@ define void @sub16mr_legacy(ptr %a, i16 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: subw %si, (%rdi) # encoding: [0x66,0x29,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub16mr_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: subw %si, (%rdi) # encoding: [0x66,0x29,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i16, ptr %a %sub = sub i16 %t, %b @@ -553,6 +843,11 @@ define void @sub32mr_legacy(ptr %a, i32 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: subl %esi, (%rdi) # encoding: [0x29,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub32mr_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: subl %esi, (%rdi) # encoding: [0x29,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i32, ptr %a %sub = sub i32 %t, %b @@ -565,6 +860,11 @@ define void @sub64mr_legacy(ptr %a, i64 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: subq %rsi, (%rdi) # encoding: [0x48,0x29,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub64mr_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: subq %rsi, (%rdi) # encoding: [0x48,0x29,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i64, ptr %a %sub = sub i64 %t, %b @@ -577,6 +877,11 @@ define void @sub8mi_legacy(ptr %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addb $-123, (%rdi) # encoding: [0x80,0x07,0x85] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub8mi_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: addb $-123, (%rdi) # encoding: [0x80,0x07,0x85] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i8, ptr %a %sub = sub nsw i8 %t, 123 @@ -590,6 +895,12 @@ define void @sub16mi_legacy(ptr %a) { ; CHECK-NEXT: addw $-1234, (%rdi) # encoding: [0x66,0x81,0x07,0x2e,0xfb] ; CHECK-NEXT: # imm = 0xFB2E ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub16mi_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: addw $-1234, (%rdi) # encoding: [0x66,0x81,0x07,0x2e,0xfb] +; NF-NEXT: # imm = 0xFB2E +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i16, ptr %a %sub = sub nsw i16 %t, 1234 @@ -603,6 +914,12 @@ define void @sub32mi_legacy(ptr %a) { ; CHECK-NEXT: addl $-123456, (%rdi) # encoding: [0x81,0x07,0xc0,0x1d,0xfe,0xff] ; CHECK-NEXT: # imm = 0xFFFE1DC0 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub32mi_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: addl $-123456, (%rdi) # encoding: [0x81,0x07,0xc0,0x1d,0xfe,0xff] +; NF-NEXT: # imm = 0xFFFE1DC0 +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i32, ptr %a %sub = sub nsw i32 %t, 123456 @@ -616,6 +933,12 @@ define void @sub64mi_legacy(ptr %a) { ; CHECK-NEXT: addq $-123456, (%rdi) # encoding: [0x48,0x81,0x07,0xc0,0x1d,0xfe,0xff] ; CHECK-NEXT: # imm = 0xFFFE1DC0 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: sub64mi_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: addq $-123456, (%rdi) # encoding: [0x48,0x81,0x07,0xc0,0x1d,0xfe,0xff] +; NF-NEXT: # imm = 0xFFFE1DC0 +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i64, ptr %a %sub = sub nsw i64 %t, 123456 diff --git a/llvm/test/CodeGen/X86/apx/xor.ll b/llvm/test/CodeGen/X86/apx/xor.ll index d203fbb02782a..436b16b4292df 100644 --- a/llvm/test/CodeGen/X86/apx/xor.ll +++ b/llvm/test/CodeGen/X86/apx/xor.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd -verify-machineinstrs --show-mc-encoding | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd,nf -verify-machineinstrs --show-mc-encoding | FileCheck --check-prefix=NF %s define i8 @xor8rr(i8 noundef %a, i8 noundef %b) { ; CHECK-LABEL: xor8rr: @@ -7,6 +8,12 @@ define i8 @xor8rr(i8 noundef %a, i8 noundef %b) { ; CHECK-NEXT: xorl %esi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x31,0xf7] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor8rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} xorl %esi, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x31,0xf7] +; NF-NEXT: # kill: def $al killed $al killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %xor = xor i8 %a, %b ret i8 %xor @@ -18,6 +25,12 @@ define i16 @xor16rr(i16 noundef %a, i16 noundef %b) { ; CHECK-NEXT: xorl %esi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x31,0xf7] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor16rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} xorl %esi, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x31,0xf7] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %xor = xor i16 %a, %b ret i16 %xor @@ -28,6 +41,11 @@ define i32 @xor32rr(i32 noundef %a, i32 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorl %esi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x31,0xf7] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor32rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} xorl %esi, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x31,0xf7] +; NF-NEXT: retq # encoding: [0xc3] entry: %xor = xor i32 %a, %b ret i32 %xor @@ -38,6 +56,11 @@ define i64 @xor64rr(i64 noundef %a, i64 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorq %rsi, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x31,0xf7] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor64rr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} xorq %rsi, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x31,0xf7] +; NF-NEXT: retq # encoding: [0xc3] entry: %xor = xor i64 %a, %b ret i64 %xor @@ -48,6 +71,11 @@ define i8 @xor8rm(i8 noundef %a, ptr %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorb (%rsi), %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0x32,0x3e] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor8rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} xorb (%rsi), %dil, %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x32,0x3e] +; NF-NEXT: retq # encoding: [0xc3] entry: %t = load i8, ptr %b %xor = xor i8 %a, %t @@ -59,6 +87,11 @@ define i16 @xor16rm(i16 noundef %a, ptr %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorw (%rsi), %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x33,0x3e] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor16rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} xorw (%rsi), %di, %ax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7d,0x1c,0x33,0x3e] +; NF-NEXT: retq # encoding: [0xc3] entry: %t = load i16, ptr %b %xor = xor i16 %a, %t @@ -70,6 +103,11 @@ define i32 @xor32rm(i32 noundef %a, ptr %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorl (%rsi), %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x33,0x3e] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor32rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} xorl (%rsi), %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x33,0x3e] +; NF-NEXT: retq # encoding: [0xc3] entry: %t = load i32, ptr %b %xor = xor i32 %a, %t @@ -81,6 +119,11 @@ define i64 @xor64rm(i64 noundef %a, ptr %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorq (%rsi), %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x33,0x3e] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor64rm: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} xorq (%rsi), %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x33,0x3e] +; NF-NEXT: retq # encoding: [0xc3] entry: %t = load i64, ptr %b %xor = xor i64 %a, %t @@ -93,6 +136,12 @@ define i16 @xor16ri8(i16 noundef %a) { ; CHECK-NEXT: xorl $123, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xf7,0x7b] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor16ri8: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} xorl $123, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x83,0xf7,0x7b] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %xor = xor i16 %a, 123 ret i16 %xor @@ -103,6 +152,11 @@ define i32 @xor32ri8(i32 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorl $123, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xf7,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor32ri8: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} xorl $123, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x83,0xf7,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %xor = xor i32 %a, 123 ret i32 %xor @@ -113,6 +167,11 @@ define i64 @xor64ri8(i64 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorq $123, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0xf7,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor64ri8: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} xorq $123, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x83,0xf7,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %xor = xor i64 %a, 123 ret i64 %xor @@ -123,6 +182,11 @@ define i8 @xor8ri(i8 noundef %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorb $123, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0x80,0xf7,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor8ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} xorb $123, %dil, %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x80,0xf7,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %xor = xor i8 %a, 123 ret i8 %xor @@ -135,6 +199,13 @@ define i16 @xor16ri(i16 noundef %a) { ; CHECK-NEXT: # imm = 0x4D2 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor16ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} xorl $1234, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0xf7,0xd2,0x04,0x00,0x00] +; NF-NEXT: # imm = 0x4D2 +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %xor = xor i16 %a, 1234 ret i16 %xor @@ -146,6 +217,12 @@ define i32 @xor32ri(i32 noundef %a) { ; CHECK-NEXT: xorl $123456, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0xf7,0x40,0xe2,0x01,0x00] ; CHECK-NEXT: # imm = 0x1E240 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor32ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} xorl $123456, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0xf7,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: retq # encoding: [0xc3] entry: %xor = xor i32 %a, 123456 ret i32 %xor @@ -157,6 +234,12 @@ define i64 @xor64ri(i64 noundef %a) { ; CHECK-NEXT: xorq $123456, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0xf7,0x40,0xe2,0x01,0x00] ; CHECK-NEXT: # imm = 0x1E240 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor64ri: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} xorq $123456, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x81,0xf7,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: retq # encoding: [0xc3] entry: %xor = xor i64 %a, 123456 ret i64 %xor @@ -167,6 +250,11 @@ define i8 @xor8mr(ptr %a, i8 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorb %sil, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0x30,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor8mr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} xorb %sil, (%rdi), %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x30,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i8, ptr %a %xor = xor i8 %t, %b @@ -178,6 +266,11 @@ define i16 @xor16mr(ptr %a, i16 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorw %si, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0x31,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor16mr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} xorw %si, (%rdi), %ax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7d,0x1c,0x31,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i16, ptr %a %xor = xor i16 %t, %b @@ -189,6 +282,11 @@ define i32 @xor32mr(ptr %a, i32 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorl %esi, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x31,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor32mr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} xorl %esi, (%rdi), %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x31,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i32, ptr %a %xor = xor i32 %t, %b @@ -200,6 +298,11 @@ define i64 @xor64mr(ptr %a, i64 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorq %rsi, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x31,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor64mr: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} xorq %rsi, (%rdi), %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x31,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i64, ptr %a %xor = xor i64 %t, %b @@ -213,6 +316,13 @@ define i16 @xor16mi8(ptr %a) { ; CHECK-NEXT: xorl $123, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xf0,0x7b] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor16mi8: +; NF: # %bb.0: # %entry +; NF-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] +; NF-NEXT: xorl $123, %eax # EVEX TO LEGACY Compression encoding: [0x83,0xf0,0x7b] +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i16, ptr %a %xor = xor i16 %t, 123 @@ -224,6 +334,11 @@ define i32 @xor32mi8(ptr %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorl $123, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0x37,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor32mi8: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} xorl $123, (%rdi), %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x83,0x37,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i32, ptr %a %xor = xor i32 %t, 123 @@ -235,6 +350,11 @@ define i64 @xor64mi8(ptr %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorq $123, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0x37,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor64mi8: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} xorq $123, (%rdi), %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x83,0x37,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i64, ptr %a %xor = xor i64 %t, 123 @@ -246,6 +366,11 @@ define i8 @xor8mi(ptr %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorb $123, (%rdi), %al # encoding: [0x62,0xf4,0x7c,0x18,0x80,0x37,0x7b] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor8mi: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} xorb $123, (%rdi), %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x80,0x37,0x7b] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i8, ptr %a %xor = xor i8 %t, 123 @@ -260,6 +385,14 @@ define i16 @xor16mi(ptr %a) { ; CHECK-NEXT: # imm = 0x4D2 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor16mi: +; NF: # %bb.0: # %entry +; NF-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] +; NF-NEXT: xorl $1234, %eax # EVEX TO LEGACY Compression encoding: [0x35,0xd2,0x04,0x00,0x00] +; NF-NEXT: # imm = 0x4D2 +; NF-NEXT: # kill: def $ax killed $ax killed $eax +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i16, ptr %a %xor = xor i16 %t, 1234 @@ -272,6 +405,12 @@ define i32 @xor32mi(ptr %a) { ; CHECK-NEXT: xorl $123456, (%rdi), %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0x37,0x40,0xe2,0x01,0x00] ; CHECK-NEXT: # imm = 0x1E240 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor32mi: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} xorl $123456, (%rdi), %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0x37,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i32, ptr %a %xor = xor i32 %t, 123456 @@ -284,6 +423,12 @@ define i64 @xor64mi(ptr %a) { ; CHECK-NEXT: xorq $123456, (%rdi), %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0x37,0x40,0xe2,0x01,0x00] ; CHECK-NEXT: # imm = 0x1E240 ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor64mi: +; NF: # %bb.0: # %entry +; NF-NEXT: {nf} xorq $123456, (%rdi), %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x81,0x37,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i64, ptr %a %xor = xor i64 %t, 123456 @@ -301,6 +446,15 @@ define i1 @xorflag8rr(i8 %a, i8 %b) { ; CHECK-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xorflag8rr: +; NF: # %bb.0: +; NF-NEXT: {nf} xorl %edi, %esi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x31,0xfe] +; NF-NEXT: xorb $-1, %al, %cl # encoding: [0x62,0xf4,0x74,0x18,0x80,0xf0,0xff] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %xor = xor i8 %b, -1 %v0 = xor i8 %a, %xor ; 0xff << 50 %v1 = icmp eq i8 %v0, 0 @@ -317,6 +471,15 @@ define i1 @xorflag16rr(i16 %a, i16 %b) { ; CHECK-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xorflag16rr: +; NF: # %bb.0: +; NF-NEXT: {nf} xorl %edi, %esi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x31,0xfe] +; NF-NEXT: xorw $-1, %ax, %cx # encoding: [0x62,0xf4,0x75,0x18,0x83,0xf0,0xff] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %xor = xor i16 %b, -1 %v0 = xor i16 %a, %xor ; 0xff << 50 %v1 = icmp eq i16 %v0, 0 @@ -332,6 +495,14 @@ define i1 @xorflag32rr(i32 %a, i32 %b) { ; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xorflag32rr: +; NF: # %bb.0: +; NF-NEXT: xorl %esi, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x31,0xf7] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %v0 = xor i32 %a, %b ; 0xff << 50 %v1 = icmp eq i32 %v0, 0 store i32 %v0, ptr @d64 @@ -346,6 +517,14 @@ define i1 @xorflag64rr(i64 %a, i64 %b) { ; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xorflag64rr: +; NF: # %bb.0: +; NF-NEXT: xorq %rsi, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x31,0xf7] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %v0 = xor i64 %a, %b ; 0xff << 50 %v1 = icmp eq i64 %v0, 0 store i64 %v0, ptr @d64 @@ -361,6 +540,15 @@ define i1 @xorflag8rm(ptr %ptr, i8 %b) { ; CHECK-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xorflag8rm: +; NF: # %bb.0: +; NF-NEXT: {nf} xorb (%rdi), %sil, %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x32,0x37] +; NF-NEXT: xorb $-1, %al, %cl # encoding: [0x62,0xf4,0x74,0x18,0x80,0xf0,0xff] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %a = load i8, ptr %ptr %xor = xor i8 %b, -1 %v0 = xor i8 %a, %xor ; 0xff << 50 @@ -378,6 +566,15 @@ define i1 @xorflag16rm(ptr %ptr, i16 %b) { ; CHECK-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xorflag16rm: +; NF: # %bb.0: +; NF-NEXT: {nf} xorw (%rdi), %si, %ax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7d,0x1c,0x33,0x37] +; NF-NEXT: xorw $-1, %ax, %cx # encoding: [0x62,0xf4,0x75,0x18,0x83,0xf0,0xff] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %a = load i16, ptr %ptr %xor = xor i16 %b, -1 %v0 = xor i16 %a, %xor ; 0xff << 50 @@ -394,6 +591,14 @@ define i1 @xorflag32rm(ptr %ptr, i32 %b) { ; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xorflag32rm: +; NF: # %bb.0: +; NF-NEXT: xorl (%rdi), %esi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x33,0x37] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %a = load i32, ptr %ptr %v0 = xor i32 %a, %b ; 0xff << 50 %v1 = icmp eq i32 %v0, 0 @@ -409,6 +614,14 @@ define i1 @xorflag64rm(ptr %ptr, i64 %b) { ; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xorflag64rm: +; NF: # %bb.0: +; NF-NEXT: xorq (%rdi), %rsi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x33,0x37] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %a = load i64, ptr %ptr %v0 = xor i64 %a, %b ; 0xff << 50 %v1 = icmp eq i64 %v0, 0 @@ -424,6 +637,14 @@ define i1 @xorflag8ri(i8 %a) { ; CHECK-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xorflag8ri: +; NF: # %bb.0: +; NF-NEXT: xorb $-124, %dil, %cl # encoding: [0x62,0xf4,0x74,0x18,0x80,0xf7,0x84] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %xor = xor i8 123, -1 %v0 = xor i8 %a, %xor ; 0xff << 50 %v1 = icmp eq i8 %v0, 0 @@ -440,6 +661,15 @@ define i1 @xorflag16ri(i16 %a) { ; CHECK-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xorflag16ri: +; NF: # %bb.0: +; NF-NEXT: xorw $-1235, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x81,0xf7,0x2d,0xfb] +; NF-NEXT: # imm = 0xFB2D +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %xor = xor i16 1234, -1 %v0 = xor i16 %a, %xor ; 0xff << 50 %v1 = icmp eq i16 %v0, 0 @@ -456,6 +686,15 @@ define i1 @xorflag32ri(i32 %a) { ; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xorflag32ri: +; NF: # %bb.0: +; NF-NEXT: xorl $123456, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x81,0xf7,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %v0 = xor i32 %a, 123456 ; 0xff << 50 %v1 = icmp eq i32 %v0, 0 store i32 %v0, ptr @d64 @@ -471,6 +710,15 @@ define i1 @xorflag64ri(i64 %a) { ; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xorflag64ri: +; NF: # %bb.0: +; NF-NEXT: xorq $123456, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xf7,0x40,0xe2,0x01,0x00] +; NF-NEXT: # imm = 0x1E240 +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %v0 = xor i64 %a, 123456 ; 0xff << 50 %v1 = icmp eq i64 %v0, 0 store i64 %v0, ptr @d64 @@ -485,6 +733,14 @@ define i1 @xorflag16ri8(i16 %a) { ; CHECK-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xorflag16ri8: +; NF: # %bb.0: +; NF-NEXT: xorw $-124, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x83,0xf7,0x84] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %xor = xor i16 123, -1 %v0 = xor i16 %a, %xor ; 0xff << 50 %v1 = icmp eq i16 %v0, 0 @@ -500,6 +756,14 @@ define i1 @xorflag32ri8(i32 %a) { ; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xorflag32ri8: +; NF: # %bb.0: +; NF-NEXT: xorl $123, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x83,0xf7,0x7b] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %v0 = xor i32 %a, 123 ; 0xff << 50 %v1 = icmp eq i32 %v0, 0 store i32 %v0, ptr @d64 @@ -514,6 +778,14 @@ define i1 @xorflag64ri8(i64 %a) { ; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xorflag64ri8: +; NF: # %bb.0: +; NF-NEXT: xorq $123, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x83,0xf7,0x7b] +; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A] +; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte +; NF-NEXT: retq # encoding: [0xc3] %v0 = xor i64 %a, 123 ; 0xff << 50 %v1 = icmp eq i64 %v0, 0 store i64 %v0, ptr @d64 @@ -525,6 +797,11 @@ define void @xor8mr_legacy(ptr %a, i8 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorb %sil, (%rdi) # encoding: [0x40,0x30,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor8mr_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: xorb %sil, (%rdi) # encoding: [0x40,0x30,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i8, ptr %a %xor = xor i8 %t, %b @@ -537,6 +814,11 @@ define void @xor16mr_legacy(ptr %a, i16 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorw %si, (%rdi) # encoding: [0x66,0x31,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor16mr_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: xorw %si, (%rdi) # encoding: [0x66,0x31,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i16, ptr %a %xor = xor i16 %t, %b @@ -549,6 +831,11 @@ define void @xor32mr_legacy(ptr %a, i32 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorl %esi, (%rdi) # encoding: [0x31,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor32mr_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: xorl %esi, (%rdi) # encoding: [0x31,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i32, ptr %a %xor = xor i32 %t, %b @@ -561,6 +848,11 @@ define void @xor64mr_legacy(ptr %a, i64 noundef %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorq %rsi, (%rdi) # encoding: [0x48,0x31,0x37] ; CHECK-NEXT: retq # encoding: [0xc3] +; +; NF-LABEL: xor64mr_legacy: +; NF: # %bb.0: # %entry +; NF-NEXT: xorq %rsi, (%rdi) # encoding: [0x48,0x31,0x37] +; NF-NEXT: retq # encoding: [0xc3] entry: %t= load i64, ptr %a %xor = xor i64 %t, %b diff --git a/llvm/test/CodeGen/X86/atomic-non-integer.ll b/llvm/test/CodeGen/X86/atomic-non-integer.ll index 9995e7d3a4d31..d7633cb11e44c 100644 --- a/llvm/test/CodeGen/X86/atomic-non-integer.ll +++ b/llvm/test/CodeGen/X86/atomic-non-integer.ll @@ -787,3 +787,100 @@ define double @load_double_seq_cst(ptr %fptr) { %v = load atomic double, ptr %fptr seq_cst, align 8 ret double %v } + +define void @store_bfloat(ptr %fptr, bfloat %v) { +; X86-LABEL: store_bfloat: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movw %cx, (%eax) +; X86-NEXT: retl +; +; X64-SSE-LABEL: store_bfloat: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pextrw $0, %xmm0, %eax +; X64-SSE-NEXT: movw %ax, (%rdi) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: store_bfloat: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vpextrw $0, %xmm0, %eax +; X64-AVX-NEXT: movw %ax, (%rdi) +; X64-AVX-NEXT: retq + store atomic bfloat %v, ptr %fptr unordered, align 2 + ret void +} + +; Work around issue #92899 by casting to float +define float @load_bfloat(ptr %fptr) { +; X86-SSE1-LABEL: load_bfloat: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %eax +; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movzwl (%eax), %eax +; X86-SSE1-NEXT: shll $16, %eax +; X86-SSE1-NEXT: movl %eax, (%esp) +; X86-SSE1-NEXT: flds (%esp) +; X86-SSE1-NEXT: popl %eax +; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE1-NEXT: retl +; +; X86-SSE2-LABEL: load_bfloat: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %eax +; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movzwl (%eax), %eax +; X86-SSE2-NEXT: shll $16, %eax +; X86-SSE2-NEXT: movd %eax, %xmm0 +; X86-SSE2-NEXT: movd %xmm0, (%esp) +; X86-SSE2-NEXT: flds (%esp) +; X86-SSE2-NEXT: popl %eax +; X86-SSE2-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: load_bfloat: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %eax +; X86-AVX-NEXT: .cfi_def_cfa_offset 8 +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX-NEXT: movzwl (%eax), %eax +; X86-AVX-NEXT: shll $16, %eax +; X86-AVX-NEXT: vmovd %eax, %xmm0 +; X86-AVX-NEXT: vmovd %xmm0, (%esp) +; X86-AVX-NEXT: flds (%esp) +; X86-AVX-NEXT: popl %eax +; X86-AVX-NEXT: .cfi_def_cfa_offset 4 +; X86-AVX-NEXT: retl +; +; X86-NOSSE-LABEL: load_bfloat: +; X86-NOSSE: # %bb.0: +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movzwl (%eax), %eax +; X86-NOSSE-NEXT: shll $16, %eax +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: flds (%esp) +; X86-NOSSE-NEXT: popl %eax +; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 +; X86-NOSSE-NEXT: retl +; +; X64-SSE-LABEL: load_bfloat: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movzwl (%rdi), %eax +; X64-SSE-NEXT: shll $16, %eax +; X64-SSE-NEXT: movd %eax, %xmm0 +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: load_bfloat: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: movzwl (%rdi), %eax +; X64-AVX-NEXT: shll $16, %eax +; X64-AVX-NEXT: vmovd %eax, %xmm0 +; X64-AVX-NEXT: retq + %v = load atomic bfloat, ptr %fptr unordered, align 2 + %ext = fpext bfloat %v to float + ret float %ext +} diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll index c981d973fef3e..bad0b411f68a9 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=ALL,AVX512F -; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw,+avx512dq | FileCheck %s --check-prefixes=ALL,AVX512BW +; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=ALL,SLOW,AVX512F +; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw,+avx512dq | FileCheck %s --check-prefixes=ALL,SLOW,AVX512BW +; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f,+avx512dq,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,FAST,AVX512F +; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw,+avx512dq,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,FAST,AVX512BW target triple = "x86_64-unknown-unknown" @@ -14,21 +16,33 @@ define <16 x float> @shuffle_v16f32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00 } define <16 x float> @shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08(<16 x float> %a, <16 x float> %b) { -; ALL-LABEL: shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08: -; ALL: # %bb.0: -; ALL-NEXT: vextractf32x4 $2, %zmm0, %xmm0 -; ALL-NEXT: vbroadcastss %xmm0, %zmm0 -; ALL-NEXT: retq +; SLOW-LABEL: shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08: +; SLOW: # %bb.0: +; SLOW-NEXT: vextractf32x4 $2, %zmm0, %xmm0 +; SLOW-NEXT: vbroadcastss %xmm0, %zmm0 +; SLOW-NEXT: retq +; +; FAST-LABEL: shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08: +; FAST: # %bb.0: +; FAST-NEXT: vbroadcastss {{.*#+}} zmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; FAST-NEXT: vpermps %zmm0, %zmm1, %zmm0 +; FAST-NEXT: retq %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> ret <16 x float> %shuffle } define <16 x float> @shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_bc(<16 x i32> %a, <16 x i32> %b) { -; ALL-LABEL: shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_bc: -; ALL: # %bb.0: -; ALL-NEXT: vextractf32x4 $2, %zmm0, %xmm0 -; ALL-NEXT: vbroadcastss %xmm0, %zmm0 -; ALL-NEXT: retq +; SLOW-LABEL: shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_bc: +; SLOW: # %bb.0: +; SLOW-NEXT: vextractf32x4 $2, %zmm0, %xmm0 +; SLOW-NEXT: vbroadcastss %xmm0, %zmm0 +; SLOW-NEXT: retq +; +; FAST-LABEL: shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_bc: +; FAST: # %bb.0: +; FAST-NEXT: vbroadcastss {{.*#+}} zmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; FAST-NEXT: vpermps %zmm0, %zmm1, %zmm0 +; FAST-NEXT: retq %tmp0 = bitcast <16 x i32> %a to <16 x float> %tmp1 = bitcast <16 x i32> %b to <16 x float> %shuffle = shufflevector <16 x float> %tmp0, <16 x float> %tmp1, <16 x i32> @@ -196,11 +210,20 @@ define <16 x float> @shuffle_f32_v16f32_00_08_01_09_02_10_03_11_04_12_05_13_06_1 ; PR86076 define <16 x float> @shuffle_f32_v16f32_00_08_00_08_00_08_00_08_00_08_00_08_00_08_00_08(float %a0, float %a1) { -; ALL-LABEL: shuffle_f32_v16f32_00_08_00_08_00_08_00_08_00_08_00_08_00_08_00_08: -; ALL: # %bb.0: -; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero -; ALL-NEXT: vbroadcastsd %xmm0, %zmm0 -; ALL-NEXT: retq +; SLOW-LABEL: shuffle_f32_v16f32_00_08_00_08_00_08_00_08_00_08_00_08_00_08_00_08: +; SLOW: # %bb.0: +; SLOW-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero +; SLOW-NEXT: vbroadcastsd %xmm0, %zmm0 +; SLOW-NEXT: retq +; +; FAST-LABEL: shuffle_f32_v16f32_00_08_00_08_00_08_00_08_00_08_00_08_00_08_00_08: +; FAST: # %bb.0: +; FAST-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; FAST-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; FAST-NEXT: vbroadcastsd {{.*#+}} ymm2 = [0,16,0,16,0,16,0,16] +; FAST-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0 +; FAST-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 +; FAST-NEXT: retq %v0 = insertelement <8 x float> poison, float %a0, i64 0 %v1 = insertelement <8 x float> poison, float %a1, i64 0 %sv = shufflevector <8 x float> %v0, <8 x float> %v1, <16 x i32> @@ -217,11 +240,17 @@ define <16 x i32> @shuffle_v16i32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_0 } define <16 x i32> @shuffle_v16i32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04(<16 x i32> %a, <16 x i32> %b) { -; ALL-LABEL: shuffle_v16i32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04: -; ALL: # %bb.0: -; ALL-NEXT: vextractf128 $1, %ymm0, %xmm0 -; ALL-NEXT: vbroadcastss %xmm0, %zmm0 -; ALL-NEXT: retq +; SLOW-LABEL: shuffle_v16i32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04: +; SLOW: # %bb.0: +; SLOW-NEXT: vextractf128 $1, %ymm0, %xmm0 +; SLOW-NEXT: vbroadcastss %xmm0, %zmm0 +; SLOW-NEXT: retq +; +; FAST-LABEL: shuffle_v16i32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04: +; FAST: # %bb.0: +; FAST-NEXT: vbroadcastss {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] +; FAST-NEXT: vpermps %zmm0, %zmm1, %zmm0 +; FAST-NEXT: retq %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> ret <16 x i32> %shuffle } @@ -302,21 +331,33 @@ define <16 x float> @shuffle_v16f32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08 ; PR46249 define <16 x i32> @shuffle_v16i32_0b_0a_09_08_0f_0e_0d_0c_03_02_01_00_07_06_05_04(<16 x i32> %a) { -; ALL-LABEL: shuffle_v16i32_0b_0a_09_08_0f_0e_0d_0c_03_02_01_00_07_06_05_04: -; ALL: # %bb.0: -; ALL-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] -; ALL-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3] -; ALL-NEXT: retq +; SLOW-LABEL: shuffle_v16i32_0b_0a_09_08_0f_0e_0d_0c_03_02_01_00_07_06_05_04: +; SLOW: # %bb.0: +; SLOW-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; SLOW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3] +; SLOW-NEXT: retq +; +; FAST-LABEL: shuffle_v16i32_0b_0a_09_08_0f_0e_0d_0c_03_02_01_00_07_06_05_04: +; FAST: # %bb.0: +; FAST-NEXT: vmovaps {{.*#+}} zmm1 = [11,10,9,8,15,14,13,12,3,2,1,0,7,6,5,4] +; FAST-NEXT: vpermps %zmm0, %zmm1, %zmm0 +; FAST-NEXT: retq %1 = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> ret <16 x i32> %1 } define <16 x float> @shuffle_v16f32_0b_0a_09_08_0f_0e_0d_0c_03_02_01_00_07_06_05_04(<16 x float> %a) { -; ALL-LABEL: shuffle_v16f32_0b_0a_09_08_0f_0e_0d_0c_03_02_01_00_07_06_05_04: -; ALL: # %bb.0: -; ALL-NEXT: vshufps {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] -; ALL-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3] -; ALL-NEXT: retq +; SLOW-LABEL: shuffle_v16f32_0b_0a_09_08_0f_0e_0d_0c_03_02_01_00_07_06_05_04: +; SLOW: # %bb.0: +; SLOW-NEXT: vshufps {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; SLOW-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3] +; SLOW-NEXT: retq +; +; FAST-LABEL: shuffle_v16f32_0b_0a_09_08_0f_0e_0d_0c_03_02_01_00_07_06_05_04: +; FAST: # %bb.0: +; FAST-NEXT: vmovaps {{.*#+}} zmm1 = [11,10,9,8,15,14,13,12,3,2,1,0,7,6,5,4] +; FAST-NEXT: vpermps %zmm0, %zmm1, %zmm0 +; FAST-NEXT: retq %1 = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> ret <16 x float> %1 } @@ -333,11 +374,17 @@ define <16 x float> @shuffle_v16f32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_ } define <16 x float> @shuffle_v16f32_load_08_11_10_00_12_15_14_04(<16 x float> %a0, ptr %a1) { -; ALL-LABEL: shuffle_v16f32_load_08_11_10_00_12_15_14_04: -; ALL: # %bb.0: -; ALL-NEXT: vshufps {{.*#+}} zmm1 = zmm0[2,0],mem[0,0],zmm0[6,4],mem[4,4],zmm0[10,8],mem[8,8],zmm0[14,12],mem[12,12] -; ALL-NEXT: vshufps {{.*#+}} zmm0 = zmm0[0,3],zmm1[0,2],zmm0[4,7],zmm1[4,6],zmm0[8,11],zmm1[8,10],zmm0[12,15],zmm1[12,14] -; ALL-NEXT: retq +; SLOW-LABEL: shuffle_v16f32_load_08_11_10_00_12_15_14_04: +; SLOW: # %bb.0: +; SLOW-NEXT: vshufps {{.*#+}} zmm1 = zmm0[2,0],mem[0,0],zmm0[6,4],mem[4,4],zmm0[10,8],mem[8,8],zmm0[14,12],mem[12,12] +; SLOW-NEXT: vshufps {{.*#+}} zmm0 = zmm0[0,3],zmm1[0,2],zmm0[4,7],zmm1[4,6],zmm0[8,11],zmm1[8,10],zmm0[12,15],zmm1[12,14] +; SLOW-NEXT: retq +; +; FAST-LABEL: shuffle_v16f32_load_08_11_10_00_12_15_14_04: +; FAST: # %bb.0: +; FAST-NEXT: vmovaps {{.*#+}} zmm1 = [0,3,2,16,4,7,6,20,8,11,10,24,12,15,14,28] +; FAST-NEXT: vpermt2ps (%rdi), %zmm1, %zmm0 +; FAST-NEXT: retq %1 = load <16 x float>, ptr %a1 %2 = shufflevector <16 x float> %1, <16 x float> %a0, <16 x i32> ret <16 x float> %2 @@ -365,26 +412,41 @@ define <16 x i32> @shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u(<16 x i32> %a ;FIXME: can do better with vpcompress define <8 x i32> @test_v16i32_1_3_5_7_9_11_13_15(<16 x i32> %v) { -; ALL-LABEL: test_v16i32_1_3_5_7_9_11_13_15: -; ALL: # %bb.0: -; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1 -; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] -; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] -; ALL-NEXT: retq +; SLOW-LABEL: test_v16i32_1_3_5_7_9_11_13_15: +; SLOW: # %bb.0: +; SLOW-NEXT: vextractf64x4 $1, %zmm0, %ymm1 +; SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] +; SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] +; SLOW-NEXT: retq +; +; FAST-LABEL: test_v16i32_1_3_5_7_9_11_13_15: +; FAST: # %bb.0: +; FAST-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,5,7,9,11,13,15] +; FAST-NEXT: vpermps %zmm0, %zmm1, %zmm0 +; FAST-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; FAST-NEXT: retq %res = shufflevector <16 x i32> %v, <16 x i32> undef, <8 x i32> ret <8 x i32> %res } ;FIXME: can do better with vpcompress define <4 x i32> @test_v16i32_0_1_2_12 (<16 x i32> %v) { -; ALL-LABEL: test_v16i32_0_1_2_12: -; ALL: # %bb.0: -; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1 -; ALL-NEXT: vextractf128 $1, %ymm1, %xmm1 -; ALL-NEXT: vbroadcastss %xmm1, %xmm1 -; ALL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] -; ALL-NEXT: vzeroupper -; ALL-NEXT: retq +; SLOW-LABEL: test_v16i32_0_1_2_12: +; SLOW: # %bb.0: +; SLOW-NEXT: vextractf64x4 $1, %zmm0, %ymm1 +; SLOW-NEXT: vextractf128 $1, %ymm1, %xmm1 +; SLOW-NEXT: vbroadcastss %xmm1, %xmm1 +; SLOW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] +; SLOW-NEXT: vzeroupper +; SLOW-NEXT: retq +; +; FAST-LABEL: test_v16i32_0_1_2_12: +; FAST: # %bb.0: +; FAST-NEXT: vmovaps {{.*#+}} xmm1 = [0,1,2,12] +; FAST-NEXT: vpermps %zmm0, %zmm1, %zmm0 +; FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; FAST-NEXT: vzeroupper +; FAST-NEXT: retq %res = shufflevector <16 x i32> %v, <16 x i32> undef, <4 x i32> ret <4 x i32> %res } @@ -568,11 +630,18 @@ define <16 x i32> @shuffle_v8i32_17_16_01_00_21_20_05_04_25_24_09_08_29_28_13_12 } define <16 x float> @shuffle_v8f32_v16f32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04(<8 x float> %a) { -; ALL-LABEL: shuffle_v8f32_v16f32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04: -; ALL: # %bb.0: -; ALL-NEXT: vextractf128 $1, %ymm0, %xmm0 -; ALL-NEXT: vbroadcastss %xmm0, %zmm0 -; ALL-NEXT: retq +; SLOW-LABEL: shuffle_v8f32_v16f32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04: +; SLOW: # %bb.0: +; SLOW-NEXT: vextractf128 $1, %ymm0, %xmm0 +; SLOW-NEXT: vbroadcastss %xmm0, %zmm0 +; SLOW-NEXT: retq +; +; FAST-LABEL: shuffle_v8f32_v16f32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04: +; FAST: # %bb.0: +; FAST-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; FAST-NEXT: vbroadcastss {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] +; FAST-NEXT: vpermps %zmm0, %zmm1, %zmm0 +; FAST-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <16 x i32> ret <16 x float> %shuffle } diff --git a/llvm/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_xsec_branch.s b/llvm/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_xsec_branch.s new file mode 100644 index 0000000000000..fd04f569526b9 --- /dev/null +++ b/llvm/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_xsec_branch.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj -o %t %s +# RUN: llvm-rtdyld -triple=arm64-none-linux-gnu -verify -check=%s %t + +.globl _main +.weak _label1 + +.section .text.label1,"ax" +_label1: + nop + +.section .text.main,"ax" +_main: + b _label1 + +# Branch must be to stub in .text.main, *not* back to _label1, because +# in general sections could be loaded at arbitrary addresses in target memory, +# and when initially processing locations and generating stubs we don't know +# the final layout yet, so we can't tell if the branch offset is within range. + +# rtdyld-check: *{4}(_main) = 0x14000001 diff --git a/llvm/test/MC/AMDGPU/hsa-diag-v4.s b/llvm/test/MC/AMDGPU/hsa-diag-v4.s index 069b71b7229cd..cc10d3400e9b1 100644 --- a/llvm/test/MC/AMDGPU/hsa-diag-v4.s +++ b/llvm/test/MC/AMDGPU/hsa-diag-v4.s @@ -54,7 +54,7 @@ // GCN-LABEL: warning: test_amdhsa_group_segment_fixed_size_repeated // AMDHSA: error: .amdhsa_ directives cannot be repeated -// NONAMDHSA-: error: unknown directive +// NONAMDHSA: error: unknown directive .warning "test_amdhsa_group_segment_fixed_size_repeated" .amdhsa_kernel test_amdhsa_group_segment_fixed_size_repeated .amdhsa_group_segment_fixed_size 1 diff --git a/llvm/test/MC/AsmParser/macro-at-pseudo-variable.s b/llvm/test/MC/AsmParser/macro-at-pseudo-variable.s index a083b17aa54fe..e1bb229804209 100644 --- a/llvm/test/MC/AsmParser/macro-at-pseudo-variable.s +++ b/llvm/test/MC/AsmParser/macro-at-pseudo-variable.s @@ -74,15 +74,15 @@ #--- b.s .rept 2 - .print "r\+" + .print "r\+ \+" .endr .irpc foo,12 - .print "\+i" + .print "\+\+i" .endr -# CHECK2: r0 -# CHECK2-NEXT: r1 -# CHECK2-NEXT: 0i -# CHECK2-NEXT: 1i +# CHECK2: r0 0 +# CHECK2-NEXT: r1 1 +# CHECK2-NEXT: 00i +# CHECK2-NEXT: 11i .rept 2 .rept 2 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10-wave32.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10-wave32.txt index 7d15f041bd770..78ca1bbdacf29 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10-wave32.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10-wave32.txt @@ -91,20 +91,20 @@ # FIXME: Results in invalid v_subrev_u16_dpp which apparently has the same encoding but does not exist in GFX10 -# gfx1032: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -# gfx1064: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +# COM: GFX1032: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +# COM: GFX1064: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 # 0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00 # FIXME: Results in v_mul_lo_u16_dpp -# gfx1032: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -# gfx1064: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +# COM: GFX1032: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +# COM: GFX1064: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 # 0xfa,0x04,0x0a,0x52,0x01,0xe4,0x00,0x00 # FIXME: gives v_lshlrev_b16_dpp -# gfx1032: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -# gfx1064: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +# COM: GFX1032: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +# COM: GFX1064: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 # 0xfa,0x04,0x0a,0x54,0x01,0xe4,0x00,0x00 # GFX1032: v_add_co_u32 v0, s0, v0, v2 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_ds.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_ds.txt index 36c58d4c67326..473ede00603a7 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_ds.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_ds.txt @@ -1674,7 +1674,7 @@ # GFX12: ds_pk_add_f16 v0, v0 offset:4660 ; encoding: [0x34,0x12,0x68,0xda,0x00,0x00,0x00,0x00] 0x34,0x12,0x68,0xda,0x00,0x00,0x00,0x00 -# gfx12: ds_pk_add_bf16 v2, v1 ; encoding: [0x00,0x00,0x6c,0xda,0x02,0x01,0x00,0x00] +# GFX12: ds_pk_add_bf16 v2, v1 ; encoding: [0x00,0x00,0x6c,0xda,0x02,0x01,0x00,0x00] 0x00,0x00,0x6c,0xda,0x02,0x01,0x00,0x00 # GFX12: ds_pk_add_f16 v0, v0 offset:4660 ; encoding: [0x34,0x12,0x68,0xda,0x00,0x00,0x00,0x00] diff --git a/llvm/test/MC/WebAssembly/basic-assembly.s b/llvm/test/MC/WebAssembly/basic-assembly.s index 769cd7edfa8a3..ac358c1b5c7a5 100644 --- a/llvm/test/MC/WebAssembly/basic-assembly.s +++ b/llvm/test/MC/WebAssembly/basic-assembly.s @@ -146,12 +146,14 @@ test0: .ident "clang version 9.0.0 (trunk 364502) (llvm/trunk 364571)" -.tabletype empty_eref_table, externref -empty_eref_table: +.tabletype empty_externref_table, externref +empty_externref_table: -.tabletype empty_fref_table, funcref -empty_fref_table: +.tabletype empty_funcref_table, funcref +empty_funcref_table: +.tabletype empty_exnref_table, exnref +empty_exnref_table: # CHECK: .text # CHECK: .globaltype __stack_pointer, i32 @@ -283,8 +285,11 @@ empty_fref_table: # CHECK-NEXT: .p2align 2 # CHECK-NEXT: .int32 test0 -# CHECK: .tabletype empty_eref_table, externref -# CHECK-NEXT: empty_eref_table: +# CHECK: .tabletype empty_externref_table, externref +# CHECK-NEXT: empty_externref_table: -# CHECK: .tabletype empty_fref_table, funcref -# CHECK-NEXT: empty_fref_table: +# CHECK: .tabletype empty_funcref_table, funcref +# CHECK-NEXT: empty_funcref_table: + +# CHECK: .tabletype empty_exnref_table, exnref +# CHECK-NEXT: empty_exnref_table: diff --git a/llvm/test/MC/WebAssembly/reference-types.s b/llvm/test/MC/WebAssembly/reference-types.s index ab3e3ee6b155b..2f8bfba68dcea 100644 --- a/llvm/test/MC/WebAssembly/reference-types.s +++ b/llvm/test/MC/WebAssembly/reference-types.s @@ -4,22 +4,27 @@ # CHECK-LABEL:ref_is_null: # CHECK: ref.is_null # encoding: [0xd1] ref_is_null: - .functype ref_is_null () -> (i32, i32) + .functype ref_is_null () -> (i32, i32, i32) ref.null_extern ref.is_null ref.null_func ref.is_null + ref.null_exn + ref.is_null end_function # CHECK-LABEL: ref_null_test: # CHECK: ref.null_func # encoding: [0xd0,0x70] # CHECK: ref.null_extern # encoding: [0xd0,0x6f] +# CHECK: ref.null_exn # encoding: [0xd0,0x69] ref_null_test: .functype ref_null_test () -> () ref.null_func drop ref.null_extern drop + ref.null_exn + drop end_function # CHECK-LABEL: ref_sig_test_funcref: @@ -36,9 +41,17 @@ ref_sig_test_externref: local.get 0 end_function +# CHECK-LABEL: ref_sig_test_exnref: +# CHECK-NEXT: .functype ref_sig_test_exnref (exnref) -> (exnref) +ref_sig_test_exnref: + .functype ref_sig_test_exnref (exnref) -> (exnref) + local.get 0 + end_function + # CHECK-LABEL: ref_select_test: # CHECK: funcref.select # encoding: [0x1b] # CHECK: externref.select # encoding: [0x1b] +# CHECK: exnref.select # encoding: [0x1b] ref_select_test: .functype ref_select_test () -> () ref.null_func @@ -51,15 +64,24 @@ ref_select_test: i32.const 0 externref.select drop + ref.null_exn + ref.null_exn + i32.const 0 + exnref.select + drop end_function # CHECK-LABEL: ref_block_test: # CHECK: block funcref # CHECK: block externref +# CHECK: block exnref ref_block_test: - .functype ref_block_test () -> (externref, funcref) + .functype ref_block_test () -> (exnref, externref, funcref) block funcref block externref + block exnref + ref.null_exn + end_block ref.null_extern end_block ref.null_func diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s index d397188a9882e..113a23da776fa 100644 --- a/llvm/test/MC/WebAssembly/simd-encodings.s +++ b/llvm/test/MC/WebAssembly/simd-encodings.s @@ -851,4 +851,28 @@ main: # CHECK: f16x8.extract_lane 1 # encoding: [0xfd,0xa1,0x02,0x01] f16x8.extract_lane 1 + # CHECK: f16x8.add # encoding: [0xfd,0xb4,0x02] + f16x8.add + + # CHECK: f16x8.sub # encoding: [0xfd,0xb5,0x02] + f16x8.sub + + # CHECK: f16x8.mul # encoding: [0xfd,0xb6,0x02] + f16x8.mul + + # CHECK: f16x8.div # encoding: [0xfd,0xb7,0x02] + f16x8.div + + # CHECK: f16x8.min # encoding: [0xfd,0xb8,0x02] + f16x8.min + + # CHECK: f16x8.max # encoding: [0xfd,0xb9,0x02] + f16x8.max + + # CHECK: f16x8.pmin # encoding: [0xfd,0xba,0x02] + f16x8.pmin + + # CHECK: f16x8.pmax # encoding: [0xfd,0xbb,0x02] + f16x8.pmax + end_function diff --git a/llvm/test/MC/WebAssembly/type-checker-errors.s b/llvm/test/MC/WebAssembly/type-checker-errors.s index 5e28d117501e9..d2841250137a8 100644 --- a/llvm/test/MC/WebAssembly/type-checker-errors.s +++ b/llvm/test/MC/WebAssembly/type-checker-errors.s @@ -215,6 +215,22 @@ table_fill_type_mismatch_3: table.fill valid_table end_function +table_fill_type_mismatch_4: + .functype table_fill_type_mismatch_4 () -> () + ref.null_exn + i32.const 1 +# CHECK: [[@LINE+1]]:3: error: popped exnref, expected externref + table.fill valid_table + end_function + +table_fill_type_mismatch_5: + .functype table_fill_type_mismatch_5 () -> () + ref.null_exn + i32.const 1 +# CHECK: [[@LINE+1]]:3: error: popped exnref, expected externref + table.fill valid_table + end_function + table_grow_non_exist_table: .functype table_grow_non_exist_table (externref, i32) -> (i32) local.get 0 diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll index 72109d0cff437..4290e4f705887 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll @@ -1,34 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s ; Ports of most of test/CodeGen/NVPTX/access-non-generic.ll @scalar = internal addrspace(3) global float 0.0, align 4 @array = internal addrspace(3) global [10 x float] zeroinitializer, align 4 -; CHECK-LABEL: @load_store_lds_f32( -; CHECK: %tmp = load float, ptr addrspace(3) @scalar, align 4 -; CHECK: call void @use(float %tmp) -; CHECK: store float %v, ptr addrspace(3) @scalar, align 4 -; CHECK: call void @llvm.amdgcn.s.barrier() -; CHECK: %tmp2 = load float, ptr addrspace(3) @scalar, align 4 -; CHECK: call void @use(float %tmp2) -; CHECK: store float %v, ptr addrspace(3) @scalar, align 4 -; CHECK: call void @llvm.amdgcn.s.barrier() -; CHECK: %tmp3 = load float, ptr addrspace(3) getelementptr inbounds ([10 x float], ptr addrspace(3) @array, i32 0, i32 5), align 4 -; CHECK: call void @use(float %tmp3) -; CHECK: store float %v, ptr addrspace(3) getelementptr inbounds ([10 x float], ptr addrspace(3) @array, i32 0, i32 5), align 4 -; CHECK: call void @llvm.amdgcn.s.barrier() -; CHECK: %tmp4 = getelementptr inbounds [10 x float], ptr addrspace(3) @array, i32 0, i32 5 -; CHECK: %tmp5 = load float, ptr addrspace(3) %tmp4, align 4 -; CHECK: call void @use(float %tmp5) -; CHECK: store float %v, ptr addrspace(3) %tmp4, align 4 -; CHECK: call void @llvm.amdgcn.s.barrier() -; CHECK: %tmp7 = getelementptr inbounds [10 x float], ptr addrspace(3) @array, i32 0, i32 %i -; CHECK: %tmp8 = load float, ptr addrspace(3) %tmp7, align 4 -; CHECK: call void @use(float %tmp8) -; CHECK: store float %v, ptr addrspace(3) %tmp7, align 4 -; CHECK: call void @llvm.amdgcn.s.barrier() -; CHECK: ret void define amdgpu_kernel void @load_store_lds_f32(i32 %i, float %v) #0 { +; CHECK-LABEL: define amdgpu_kernel void @load_store_lds_f32( +; CHECK-SAME: i32 [[I:%.*]], float [[V:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[TMP:%.*]] = load float, ptr addrspace(3) @scalar, align 4 +; CHECK-NEXT: call void @use(float [[TMP]]) +; CHECK-NEXT: store float [[V]], ptr addrspace(3) @scalar, align 4 +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(3) @scalar, align 4 +; CHECK-NEXT: call void @use(float [[TMP2]]) +; CHECK-NEXT: store float [[V]], ptr addrspace(3) @scalar, align 4 +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(3) getelementptr inbounds ([10 x float], ptr addrspace(3) @array, i32 0, i32 5), align 4 +; CHECK-NEXT: call void @use(float [[TMP3]]) +; CHECK-NEXT: store float [[V]], ptr addrspace(3) getelementptr inbounds ([10 x float], ptr addrspace(3) @array, i32 0, i32 5), align 4 +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [10 x float], ptr addrspace(3) @array, i32 0, i32 5 +; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(3) [[TMP4]], align 4 +; CHECK-NEXT: call void @use(float [[TMP5]]) +; CHECK-NEXT: store float [[V]], ptr addrspace(3) [[TMP4]], align 4 +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [10 x float], ptr addrspace(3) @array, i32 0, i32 [[I]] +; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr addrspace(3) [[TMP7]], align 4 +; CHECK-NEXT: call void @use(float [[TMP8]]) +; CHECK-NEXT: store float [[V]], ptr addrspace(3) [[TMP7]], align 4 +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: ret void +; bb: %tmp = load float, ptr addrspacecast (ptr addrspace(3) @scalar to ptr), align 4 call void @use(float %tmp) @@ -57,20 +61,27 @@ bb: ret void } -; CHECK-LABEL: @constexpr_load_int_from_float_lds( -; CHECK: %tmp = load i32, ptr addrspace(3) @scalar, align 4 define i32 @constexpr_load_int_from_float_lds() #0 { +; CHECK-LABEL: define i32 @constexpr_load_int_from_float_lds( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr addrspace(3) @scalar, align 4 +; CHECK-NEXT: ret i32 [[TMP]] +; bb: %tmp = load i32, ptr addrspacecast (ptr addrspace(3) @scalar to ptr), align 4 ret i32 %tmp } -; CHECK-LABEL: @load_int_from_global_float( -; CHECK: %tmp1 = getelementptr float, ptr addrspace(1) %input, i32 %i -; CHECK: %tmp2 = getelementptr float, ptr addrspace(1) %tmp1, i32 %j -; CHECK: %tmp4 = load i32, ptr addrspace(1) %tmp2 -; CHECK: ret i32 %tmp4 define i32 @load_int_from_global_float(ptr addrspace(1) %input, i32 %i, i32 %j) #0 { +; CHECK-LABEL: define i32 @load_int_from_global_float( +; CHECK-SAME: ptr addrspace(1) [[INPUT:%.*]], i32 [[I:%.*]], i32 [[J:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr addrspace(1) [[INPUT]], i32 [[I]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr addrspace(1) [[TMP1]], i32 [[J]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[TMP2]], align 4 +; CHECK-NEXT: ret i32 [[TMP4]] +; bb: %tmp = addrspacecast ptr addrspace(1) %input to ptr %tmp1 = getelementptr float, ptr %tmp, i32 %i @@ -79,20 +90,26 @@ bb: ret i32 %tmp4 } -; CHECK-LABEL: @nested_const_expr( -; CHECK: store i32 1, ptr addrspace(3) getelementptr inbounds ([10 x float], ptr addrspace(3) @array, i64 0, i64 1), align 4 define amdgpu_kernel void @nested_const_expr() #0 { +; CHECK-LABEL: define amdgpu_kernel void @nested_const_expr( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: store i32 1, ptr addrspace(3) getelementptr inbounds ([10 x float], ptr addrspace(3) @array, i64 0, i64 1), align 4 +; CHECK-NEXT: ret void +; store i32 1, ptr bitcast (ptr getelementptr ([10 x float], ptr addrspacecast (ptr addrspace(3) @array to ptr), i64 0, i64 1) to ptr), align 4 ret void } -; CHECK-LABEL: @rauw( -; CHECK: %addr = getelementptr float, ptr addrspace(1) %input, i64 10 -; CHECK-NEXT: %v = load float, ptr addrspace(1) %addr -; CHECK-NEXT: store float %v, ptr addrspace(1) %addr -; CHECK-NEXT: ret void define amdgpu_kernel void @rauw(ptr addrspace(1) %input) #0 { +; CHECK-LABEL: define amdgpu_kernel void @rauw( +; CHECK-SAME: ptr addrspace(1) [[INPUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr float, ptr addrspace(1) [[INPUT]], i64 10 +; CHECK-NEXT: [[V:%.*]] = load float, ptr addrspace(1) [[ADDR]], align 4 +; CHECK-NEXT: store float [[V]], ptr addrspace(1) [[ADDR]], align 4 +; CHECK-NEXT: ret void +; bb: %generic_input = addrspacecast ptr addrspace(1) %input to ptr %addr = getelementptr float, ptr %generic_input, i64 10 @@ -102,20 +119,22 @@ bb: } ; FIXME: Should be able to eliminate the cast inside the loop -; CHECK-LABEL: @loop( - -; CHECK: %end = getelementptr float, ptr addrspace(3) @array, i64 10 -; CHECK: br label %loop - -; CHECK: loop: ; preds = %loop, %entry -; CHECK: %i = phi ptr addrspace(3) [ @array, %entry ], [ %i2, %loop ] -; CHECK: %v = load float, ptr addrspace(3) %i -; CHECK: call void @use(float %v) -; CHECK: %i2 = getelementptr float, ptr addrspace(3) %i, i64 1 -; CHECK: %exit_cond = icmp eq ptr addrspace(3) %i2, %end - -; CHECK: br i1 %exit_cond, label %exit, label %loop define amdgpu_kernel void @loop() #0 { +; CHECK-LABEL: define amdgpu_kernel void @loop( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[END:%.*]] = getelementptr float, ptr addrspace(3) @array, i64 10 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[I:%.*]] = phi ptr addrspace(3) [ @array, %[[ENTRY]] ], [ [[I2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[V:%.*]] = load float, ptr addrspace(3) [[I]], align 4 +; CHECK-NEXT: call void @use(float [[V]]) +; CHECK-NEXT: [[I2]] = getelementptr float, ptr addrspace(3) [[I]], i64 1 +; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq ptr addrspace(3) [[I2]], [[END]] +; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; entry: %p = addrspacecast ptr addrspace(3) @array to ptr %end = getelementptr float, ptr %p, i64 10 @@ -135,19 +154,23 @@ exit: ; preds = %loop @generic_end = external addrspace(1) global ptr -; CHECK-LABEL: @loop_with_generic_bound( -; CHECK: %end = load ptr, ptr addrspace(1) @generic_end -; CHECK: br label %loop - -; CHECK: loop: -; CHECK: %i = phi ptr addrspace(3) [ @array, %entry ], [ %i2, %loop ] -; CHECK: %v = load float, ptr addrspace(3) %i -; CHECK: call void @use(float %v) -; CHECK: %i2 = getelementptr float, ptr addrspace(3) %i, i64 1 -; CHECK: %0 = addrspacecast ptr addrspace(3) %i2 to ptr -; CHECK: %exit_cond = icmp eq ptr %0, %end -; CHECK: br i1 %exit_cond, label %exit, label %loop define amdgpu_kernel void @loop_with_generic_bound() #0 { +; CHECK-LABEL: define amdgpu_kernel void @loop_with_generic_bound( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[END:%.*]] = load ptr, ptr addrspace(1) @generic_end, align 8 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[I:%.*]] = phi ptr addrspace(3) [ @array, %[[ENTRY]] ], [ [[I2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[V:%.*]] = load float, ptr addrspace(3) [[I]], align 4 +; CHECK-NEXT: call void @use(float [[V]]) +; CHECK-NEXT: [[I2]] = getelementptr float, ptr addrspace(3) [[I]], i64 1 +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(3) [[I2]] to ptr +; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq ptr [[TMP0]], [[END]] +; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; entry: %p = addrspacecast ptr addrspace(3) @array to ptr %end = load ptr, ptr addrspace(1) @generic_end @@ -165,11 +188,14 @@ exit: ; preds = %loop ret void } -; CHECK-LABEL: @select_bug( -; CHECK: %sel = select i1 icmp ne (ptr inttoptr (i64 4873 to ptr), ptr null), i64 73, i64 93 -; CHECK: %add.ptr157 = getelementptr inbounds i64, ptr undef, i64 %sel -; CHECK: %cmp169 = icmp uge ptr undef, %add.ptr157 define void @select_bug() #0 { +; CHECK-LABEL: define void @select_bug( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[SEL:%.*]] = select i1 icmp ne (ptr inttoptr (i64 4873 to ptr), ptr null), i64 73, i64 93 +; CHECK-NEXT: [[ADD_PTR157:%.*]] = getelementptr inbounds i64, ptr undef, i64 [[SEL]] +; CHECK-NEXT: [[CMP169:%.*]] = icmp uge ptr undef, [[ADD_PTR157]] +; CHECK-NEXT: unreachable +; %sel = select i1 icmp ne (ptr inttoptr (i64 4873 to ptr), ptr null), i64 73, i64 93 %add.ptr157 = getelementptr inbounds i64, ptr undef, i64 %sel %cmp169 = icmp uge ptr undef, %add.ptr157 diff --git a/llvm/test/Transforms/InferAddressSpaces/NVPTX/bug31948.ll b/llvm/test/Transforms/InferAddressSpaces/NVPTX/bug31948.ll index e6b517a73fa46..23c5f99e5d086 100644 --- a/llvm/test/Transforms/InferAddressSpaces/NVPTX/bug31948.ll +++ b/llvm/test/Transforms/InferAddressSpaces/NVPTX/bug31948.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -S -mtriple=nvptx64-nvidia-cuda -passes=infer-address-spaces %s | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" @@ -6,18 +7,23 @@ target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" @var1 = local_unnamed_addr addrspace(3) externally_initialized global %struct.bar undef, align 8 -; CHECK-LABEL: @bug31948( -; CHECK: %tmp = load ptr, ptr addrspace(3) getelementptr inbounds (%struct.bar, ptr addrspace(3) @var1, i64 0, i32 1), align 8 -; CHECK: %tmp1 = load float, ptr %tmp, align 4 -; CHECK: store float %conv1, ptr %tmp, align 4 -; CHECK: store i32 32, ptr addrspace(3) getelementptr inbounds (%struct.bar, ptr addrspace(3) @var1, i64 0, i32 1), align 4 define void @bug31948(float %a, ptr nocapture readnone %x, ptr nocapture readnone %y) local_unnamed_addr #0 { +; CHECK-LABEL: define void @bug31948( +; CHECK-SAME: float [[A:%.*]], ptr nocapture readnone [[X:%.*]], ptr nocapture readnone [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP:%.*]] = load ptr, ptr addrspace(3) getelementptr inbounds ([[STRUCT_BAR:%.*]], ptr addrspace(3) @var1, i64 0, i32 1), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[TMP]], align 4 +; CHECK-NEXT: [[CONV1:%.*]] = fadd float [[TMP1]], 1.000000e+00 +; CHECK-NEXT: store float [[CONV1]], ptr [[TMP]], align 4 +; CHECK-NEXT: store i32 32, ptr addrspace(3) getelementptr inbounds ([[STRUCT_BAR]], ptr addrspace(3) @var1, i64 0, i32 1), align 4 +; CHECK-NEXT: ret void +; entry: %tmp = load ptr, ptr getelementptr (%struct.bar, ptr addrspacecast (ptr addrspace(3) @var1 to ptr), i64 0, i32 1), align 8 %tmp1 = load float, ptr %tmp, align 4 %conv1 = fadd float %tmp1, 1.000000e+00 store float %conv1, ptr %tmp, align 4 - store i32 32, ptr bitcast (ptr getelementptr (%struct.bar, ptr addrspacecast (ptr addrspace(3) @var1 to ptr), i64 0, i32 1) to ptr), align 4 + store i32 32, ptr getelementptr (%struct.bar, ptr addrspacecast (ptr addrspace(3) @var1 to ptr), i64 0, i32 1), align 4 ret void } diff --git a/llvm/test/Transforms/Inline/ret_attr_align_and_noundef.ll b/llvm/test/Transforms/Inline/ret_attr_align_and_noundef.ll index c038ffccf3e96..f4cebf1fcb5da 100644 --- a/llvm/test/Transforms/Inline/ret_attr_align_and_noundef.ll +++ b/llvm/test/Transforms/Inline/ret_attr_align_and_noundef.ll @@ -5,10 +5,12 @@ declare ptr @foo() declare void @use.ptr(ptr) willreturn nounwind +declare void @use.val(i8) willreturn nounwind declare void @bar() declare void @baz() declare ptr @llvm.ptrmask.p0.i64(ptr, i64) declare i1 @val() +declare i8 @val8() define ptr @callee0123() { ; CHECK-LABEL: define ptr @callee0123() { @@ -337,3 +339,74 @@ define ptr @caller12_todo() { %r = call nonnull ptr @callee12() ret ptr %r } + +define i8 @callee13() { +; CHECK-LABEL: define i8 @callee13() { +; CHECK-NEXT: [[R:%.*]] = call i8 @val8() +; CHECK-NEXT: ret i8 [[R]] +; + %r = call i8 @val8() + ret i8 %r +} + +define i8 @caller13_okay_use_after_poison_anyways() { +; CHECK-LABEL: define i8 @caller13_okay_use_after_poison_anyways() { +; CHECK-NEXT: [[R_I:%.*]] = call range(i8 0, 10) i8 @val8() +; CHECK-NEXT: call void @use.val(i8 [[R_I]]) +; CHECK-NEXT: ret i8 [[R_I]] +; + %r = call range(i8 0, 10) i8 @callee13() + call void @use.val(i8 %r) + ret i8 %r +} + +define i8 @callee14() { +; CHECK-LABEL: define i8 @callee14() { +; CHECK-NEXT: [[R:%.*]] = call noundef i8 @val8() +; CHECK-NEXT: ret i8 [[R]] +; + %r = call noundef i8 @val8() + ret i8 %r +} + +define i8 @caller14_fail_creates_ub() { +; CHECK-LABEL: define i8 @caller14_fail_creates_ub() { +; CHECK-NEXT: [[R_I:%.*]] = call noundef i8 @val8() +; CHECK-NEXT: call void @use.val(i8 [[R_I]]) +; CHECK-NEXT: ret i8 [[R_I]] +; + %r = call range(i8 0, 10) i8 @callee14() + call void @use.val(i8 %r) + ret i8 %r +} + +define i8 @caller14_okay_is_ub_anyways() { +; CHECK-LABEL: define i8 @caller14_okay_is_ub_anyways() { +; CHECK-NEXT: [[R_I:%.*]] = call noundef range(i8 0, 10) i8 @val8() +; CHECK-NEXT: call void @use.val(i8 [[R_I]]) +; CHECK-NEXT: ret i8 [[R_I]] +; + %r = call noundef range(i8 0, 10) i8 @callee14() + call void @use.val(i8 %r) + ret i8 %r +} + +define i8 @callee15() { +; CHECK-LABEL: define i8 @callee15() { +; CHECK-NEXT: [[R:%.*]] = call range(i8 5, 10) i8 @val8() +; CHECK-NEXT: ret i8 [[R]] +; + %r = call range(i8 5, 10) i8 @val8() + ret i8 %r +} + +define i8 @caller15_okay_intersect_ranges() { +; CHECK-LABEL: define i8 @caller15_okay_intersect_ranges() { +; CHECK-NEXT: [[R_I:%.*]] = call range(i8 5, 7) i8 @val8() +; CHECK-NEXT: call void @use.val(i8 [[R_I]]) +; CHECK-NEXT: ret i8 [[R_I]] +; + %r = call range(i8 0, 7) i8 @callee15() + call void @use.val(i8 %r) + ret i8 %r +} diff --git a/llvm/test/Transforms/InstCombine/ashr-lshr.ll b/llvm/test/Transforms/InstCombine/ashr-lshr.ll index ac206dc7999dd..c2a4f35412670 100644 --- a/llvm/test/Transforms/InstCombine/ashr-lshr.ll +++ b/llvm/test/Transforms/InstCombine/ashr-lshr.ll @@ -604,3 +604,262 @@ define <2 x i8> @ashr_known_pos_exact_vec(<2 x i8> %x, <2 x i8> %y) { %r = ashr exact <2 x i8> %p, %y ret <2 x i8> %r } + +define i32 @lshr_mul_times_3_div_2(i32 %0) { +; CHECK-LABEL: @lshr_mul_times_3_div_2( +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 1 +; CHECK-NEXT: [[LSHR:%.*]] = add nuw nsw i32 [[TMP2]], [[TMP0]] +; CHECK-NEXT: ret i32 [[LSHR]] +; + %mul = mul nsw nuw i32 %0, 3 + %lshr = lshr i32 %mul, 1 + ret i32 %lshr +} + +define i32 @lshr_mul_times_3_div_2_exact(i32 %x) { +; CHECK-LABEL: @lshr_mul_times_3_div_2_exact( +; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 1 +; CHECK-NEXT: [[LSHR:%.*]] = add nsw i32 [[TMP1]], [[X]] +; CHECK-NEXT: ret i32 [[LSHR]] +; + %mul = mul nsw i32 %x, 3 + %lshr = lshr exact i32 %mul, 1 + ret i32 %lshr +} + +; Negative test + +define i32 @lshr_mul_times_3_div_2_no_flags(i32 %0) { +; CHECK-LABEL: @lshr_mul_times_3_div_2_no_flags( +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP0:%.*]], 3 +; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[MUL]], 1 +; CHECK-NEXT: ret i32 [[LSHR]] +; + %mul = mul i32 %0, 3 + %lshr = lshr i32 %mul, 1 + ret i32 %lshr +} + +; Negative test + +define i32 @mul_times_3_div_2_multiuse_lshr(i32 %x) { +; CHECK-LABEL: @mul_times_3_div_2_multiuse_lshr( +; CHECK-NEXT: [[MUL:%.*]] = mul nuw i32 [[X:%.*]], 3 +; CHECK-NEXT: [[RES:%.*]] = lshr i32 [[MUL]], 1 +; CHECK-NEXT: call void @use(i32 [[MUL]]) +; CHECK-NEXT: ret i32 [[RES]] +; + %mul = mul nuw i32 %x, 3 + %res = lshr i32 %mul, 1 + call void @use(i32 %mul) + ret i32 %res +} + +define i32 @lshr_mul_times_3_div_2_exact_2(i32 %x) { +; CHECK-LABEL: @lshr_mul_times_3_div_2_exact_2( +; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 1 +; CHECK-NEXT: [[LSHR:%.*]] = add nuw i32 [[TMP1]], [[X]] +; CHECK-NEXT: ret i32 [[LSHR]] +; + %mul = mul nuw i32 %x, 3 + %lshr = lshr exact i32 %mul, 1 + ret i32 %lshr +} + +define i32 @lshr_mul_times_5_div_4(i32 %0) { +; CHECK-LABEL: @lshr_mul_times_5_div_4( +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 2 +; CHECK-NEXT: [[LSHR:%.*]] = add nuw nsw i32 [[TMP2]], [[TMP0]] +; CHECK-NEXT: ret i32 [[LSHR]] +; + %mul = mul nsw nuw i32 %0, 5 + %lshr = lshr i32 %mul, 2 + ret i32 %lshr +} + +define i32 @lshr_mul_times_5_div_4_exact(i32 %x) { +; CHECK-LABEL: @lshr_mul_times_5_div_4_exact( +; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 2 +; CHECK-NEXT: [[LSHR:%.*]] = add nsw i32 [[TMP1]], [[X]] +; CHECK-NEXT: ret i32 [[LSHR]] +; + %mul = mul nsw i32 %x, 5 + %lshr = lshr exact i32 %mul, 2 + ret i32 %lshr +} + +; Negative test + +define i32 @lshr_mul_times_5_div_4_no_flags(i32 %0) { +; CHECK-LABEL: @lshr_mul_times_5_div_4_no_flags( +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP0:%.*]], 5 +; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[MUL]], 2 +; CHECK-NEXT: ret i32 [[LSHR]] +; + %mul = mul i32 %0, 5 + %lshr = lshr i32 %mul, 2 + ret i32 %lshr +} + +; Negative test + +define i32 @mul_times_5_div_4_multiuse_lshr(i32 %x) { +; CHECK-LABEL: @mul_times_5_div_4_multiuse_lshr( +; CHECK-NEXT: [[MUL:%.*]] = mul nuw i32 [[X:%.*]], 5 +; CHECK-NEXT: [[RES:%.*]] = lshr i32 [[MUL]], 2 +; CHECK-NEXT: call void @use(i32 [[MUL]]) +; CHECK-NEXT: ret i32 [[RES]] +; + %mul = mul nuw i32 %x, 5 + %res = lshr i32 %mul, 2 + call void @use(i32 %mul) + ret i32 %res +} + +define i32 @lshr_mul_times_5_div_4_exact_2(i32 %x) { +; CHECK-LABEL: @lshr_mul_times_5_div_4_exact_2( +; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 2 +; CHECK-NEXT: [[LSHR:%.*]] = add nuw i32 [[TMP1]], [[X]] +; CHECK-NEXT: ret i32 [[LSHR]] +; + %mul = mul nuw i32 %x, 5 + %lshr = lshr exact i32 %mul, 2 + ret i32 %lshr +} + +define i32 @ashr_mul_times_3_div_2(i32 %0) { +; CHECK-LABEL: @ashr_mul_times_3_div_2( +; CHECK-NEXT: [[TMP2:%.*]] = ashr i32 [[TMP0:%.*]], 1 +; CHECK-NEXT: [[ASHR:%.*]] = add nuw nsw i32 [[TMP2]], [[TMP0]] +; CHECK-NEXT: ret i32 [[ASHR]] +; + %mul = mul nuw nsw i32 %0, 3 + %ashr = ashr i32 %mul, 1 + ret i32 %ashr +} + +define i32 @ashr_mul_times_3_div_2_exact(i32 %x) { +; CHECK-LABEL: @ashr_mul_times_3_div_2_exact( +; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i32 [[X:%.*]], 1 +; CHECK-NEXT: [[ASHR:%.*]] = add nsw i32 [[TMP1]], [[X]] +; CHECK-NEXT: ret i32 [[ASHR]] +; + %mul = mul nsw i32 %x, 3 + %ashr = ashr exact i32 %mul, 1 + ret i32 %ashr +} + +; Negative test + +define i32 @ashr_mul_times_3_div_2_no_flags(i32 %0) { +; CHECK-LABEL: @ashr_mul_times_3_div_2_no_flags( +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP0:%.*]], 3 +; CHECK-NEXT: [[ASHR:%.*]] = ashr i32 [[MUL]], 1 +; CHECK-NEXT: ret i32 [[ASHR]] +; + %mul = mul i32 %0, 3 + %ashr = ashr i32 %mul, 1 + ret i32 %ashr +} + +; Negative test + +define i32 @ashr_mul_times_3_div_2_no_nsw(i32 %0) { +; CHECK-LABEL: @ashr_mul_times_3_div_2_no_nsw( +; CHECK-NEXT: [[MUL:%.*]] = mul nuw i32 [[TMP0:%.*]], 3 +; CHECK-NEXT: [[ASHR:%.*]] = ashr i32 [[MUL]], 1 +; CHECK-NEXT: ret i32 [[ASHR]] +; + %mul = mul nuw i32 %0, 3 + %ashr = ashr i32 %mul, 1 + ret i32 %ashr +} + +; Negative test + +define i32 @mul_times_3_div_2_multiuse_ashr(i32 %x) { +; CHECK-LABEL: @mul_times_3_div_2_multiuse_ashr( +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[X:%.*]], 3 +; CHECK-NEXT: [[RES:%.*]] = ashr i32 [[MUL]], 1 +; CHECK-NEXT: call void @use(i32 [[MUL]]) +; CHECK-NEXT: ret i32 [[RES]] +; + %mul = mul nsw i32 %x, 3 + %res = ashr i32 %mul, 1 + call void @use(i32 %mul) + ret i32 %res +} + +define i32 @ashr_mul_times_3_div_2_exact_2(i32 %x) { +; CHECK-LABEL: @ashr_mul_times_3_div_2_exact_2( +; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i32 [[X:%.*]], 1 +; CHECK-NEXT: [[ASHR:%.*]] = add nsw i32 [[TMP1]], [[X]] +; CHECK-NEXT: ret i32 [[ASHR]] +; + %mul = mul nsw i32 %x, 3 + %ashr = ashr exact i32 %mul, 1 + ret i32 %ashr +} + +define i32 @ashr_mul_times_5_div_4(i32 %0) { +; CHECK-LABEL: @ashr_mul_times_5_div_4( +; CHECK-NEXT: [[TMP2:%.*]] = ashr i32 [[TMP0:%.*]], 2 +; CHECK-NEXT: [[ASHR:%.*]] = add nuw nsw i32 [[TMP2]], [[TMP0]] +; CHECK-NEXT: ret i32 [[ASHR]] +; + %mul = mul nuw nsw i32 %0, 5 + %ashr = ashr i32 %mul, 2 + ret i32 %ashr +} + +define i32 @ashr_mul_times_5_div_4_exact(i32 %x) { +; CHECK-LABEL: @ashr_mul_times_5_div_4_exact( +; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i32 [[X:%.*]], 2 +; CHECK-NEXT: [[ASHR:%.*]] = add nsw i32 [[TMP1]], [[X]] +; CHECK-NEXT: ret i32 [[ASHR]] +; + %mul = mul nsw i32 %x, 5 + %ashr = ashr exact i32 %mul, 2 + ret i32 %ashr +} + +; Negative test + +define i32 @ashr_mul_times_5_div_4_no_flags(i32 %0) { +; CHECK-LABEL: @ashr_mul_times_5_div_4_no_flags( +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP0:%.*]], 5 +; CHECK-NEXT: [[ASHR:%.*]] = ashr i32 [[MUL]], 2 +; CHECK-NEXT: ret i32 [[ASHR]] +; + %mul = mul i32 %0, 5 + %ashr = ashr i32 %mul, 2 + ret i32 %ashr +} + +; Negative test + +define i32 @mul_times_5_div_4_multiuse_ashr(i32 %x) { +; CHECK-LABEL: @mul_times_5_div_4_multiuse_ashr( +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[X:%.*]], 5 +; CHECK-NEXT: [[RES:%.*]] = ashr i32 [[MUL]], 2 +; CHECK-NEXT: call void @use(i32 [[MUL]]) +; CHECK-NEXT: ret i32 [[RES]] +; + %mul = mul nsw i32 %x, 5 + %res = ashr i32 %mul, 2 + call void @use(i32 %mul) + ret i32 %res +} + +define i32 @ashr_mul_times_5_div_4_exact_2(i32 %x) { +; CHECK-LABEL: @ashr_mul_times_5_div_4_exact_2( +; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i32 [[X:%.*]], 2 +; CHECK-NEXT: [[ASHR:%.*]] = add nsw i32 [[TMP1]], [[X]] +; CHECK-NEXT: ret i32 [[ASHR]] +; + %mul = mul nsw i32 %x, 5 + %ashr = ashr exact i32 %mul, 2 + ret i32 %ashr +} + +declare void @use(i32) diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-eq-to-icmp-ule.ll index 88487b38e2c70..0a7de501ca022 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-eq-to-icmp-ule.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-eq-to-icmp-ule.ll @@ -144,7 +144,7 @@ define i1 @oneuse0(i8 %x, i8 %y) { ; CHECK-LABEL: @oneuse0( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr i8 [[X:%.*]], [[Y]] +; CHECK-NEXT: [[X_HIGHBITS:%.*]] = and i8 [[T0]], [[X:%.*]] ; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[X_HIGHBITS]], 0 ; CHECK-NEXT: ret i1 [[RET]] ; @@ -161,7 +161,8 @@ define i1 @oneuse1(i8 %x, i8 %y) { ; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]] ; CHECK-NEXT: [[T1:%.*]] = xor i8 [[T0]], -1 ; CHECK-NEXT: call void @use8(i8 [[T1]]) -; CHECK-NEXT: [[RET:%.*]] = icmp uge i8 [[T1]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[T0]], [[X:%.*]] +; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[RET]] ; %t0 = shl i8 -1, %y @@ -195,7 +196,8 @@ define i1 @oneuse3(i8 %x, i8 %y) { ; CHECK-NEXT: call void @use8(i8 [[T0]]) ; CHECK-NEXT: [[T1:%.*]] = xor i8 [[T0]], -1 ; CHECK-NEXT: call void @use8(i8 [[T1]]) -; CHECK-NEXT: [[RET:%.*]] = icmp uge i8 [[T1]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[T0]], [[X:%.*]] +; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[RET]] ; %t0 = shl i8 -1, %y @@ -269,9 +271,8 @@ define i1 @n0(i8 %x, i8 %y, i8 %notx) { define i1 @n1(i8 %x, i8 %y) { ; CHECK-LABEL: @n1( ; CHECK-NEXT: [[T0:%.*]] = shl nuw i8 1, [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = xor i8 [[T0]], -1 -; CHECK-NEXT: [[T2:%.*]] = and i8 [[T1]], [[X:%.*]] -; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[T2]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[T0]], [[X:%.*]] +; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[RET]] ; %t0 = shl i8 1, %y ; not -1 @@ -284,9 +285,9 @@ define i1 @n1(i8 %x, i8 %y) { define i1 @n2(i8 %x, i8 %y) { ; CHECK-LABEL: @n2( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = xor i8 [[T0]], 1 +; CHECK-NEXT: [[T1:%.*]] = xor i8 [[T0]], -2 ; CHECK-NEXT: [[T2:%.*]] = and i8 [[T1]], [[X:%.*]] -; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[T2]], [[X]] +; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[T2]], 0 ; CHECK-NEXT: ret i1 [[RET]] ; %t0 = shl i8 -1, %y diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-ne-to-icmp-ugt.ll index b717925fd644f..54ff87676e71d 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-ne-to-icmp-ugt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-ne-to-icmp-ugt.ll @@ -144,7 +144,7 @@ define i1 @oneuse0(i8 %x, i8 %y) { ; CHECK-LABEL: @oneuse0( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr i8 [[X:%.*]], [[Y]] +; CHECK-NEXT: [[X_HIGHBITS:%.*]] = and i8 [[T0]], [[X:%.*]] ; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[X_HIGHBITS]], 0 ; CHECK-NEXT: ret i1 [[RET]] ; @@ -161,7 +161,8 @@ define i1 @oneuse1(i8 %x, i8 %y) { ; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]] ; CHECK-NEXT: [[T1:%.*]] = xor i8 [[T0]], -1 ; CHECK-NEXT: call void @use8(i8 [[T1]]) -; CHECK-NEXT: [[RET:%.*]] = icmp ult i8 [[T1]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[T0]], [[X:%.*]] +; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[RET]] ; %t0 = shl i8 -1, %y @@ -195,7 +196,8 @@ define i1 @oneuse3(i8 %x, i8 %y) { ; CHECK-NEXT: call void @use8(i8 [[T0]]) ; CHECK-NEXT: [[T1:%.*]] = xor i8 [[T0]], -1 ; CHECK-NEXT: call void @use8(i8 [[T1]]) -; CHECK-NEXT: [[RET:%.*]] = icmp ult i8 [[T1]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[T0]], [[X:%.*]] +; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[RET]] ; %t0 = shl i8 -1, %y @@ -269,9 +271,8 @@ define i1 @n0(i8 %x, i8 %y, i8 %notx) { define i1 @n1(i8 %x, i8 %y) { ; CHECK-LABEL: @n1( ; CHECK-NEXT: [[T0:%.*]] = shl nuw i8 1, [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = xor i8 [[T0]], -1 -; CHECK-NEXT: [[T2:%.*]] = and i8 [[T1]], [[X:%.*]] -; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[T2]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[T0]], [[X:%.*]] +; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[RET]] ; %t0 = shl i8 1, %y ; not -1 @@ -284,9 +285,9 @@ define i1 @n1(i8 %x, i8 %y) { define i1 @n2(i8 %x, i8 %y) { ; CHECK-LABEL: @n2( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = xor i8 [[T0]], 1 +; CHECK-NEXT: [[T1:%.*]] = xor i8 [[T0]], -2 ; CHECK-NEXT: [[T2:%.*]] = and i8 [[T1]], [[X:%.*]] -; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[T2]], [[X]] +; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[T2]], 0 ; CHECK-NEXT: ret i1 [[RET]] ; %t0 = shl i8 -1, %y diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll index a65be1e9ceeca..c7c57b601eab3 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll @@ -251,9 +251,9 @@ define i1 @n1(i8 %x, i8 %y) { ; CHECK-LABEL: @n1( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = add i8 [[T0]], -1 +; CHECK-NEXT: [[T1:%.*]] = sub i8 0, [[T0]] ; CHECK-NEXT: [[T2:%.*]] = and i8 [[T1]], [[X:%.*]] -; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[T2]], [[X]] +; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[T2]], 0 ; CHECK-NEXT: ret i1 [[RET]] ; %t0 = shl i8 -1, %y ; not 1 @@ -268,9 +268,9 @@ define i1 @n2(i8 %x, i8 %y) { ; CHECK-LABEL: @n2( ; CHECK-NEXT: [[T0:%.*]] = shl nuw i8 1, [[Y:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = add nuw i8 [[T0]], 1 +; CHECK-NEXT: [[T1:%.*]] = sub nuw i8 -2, [[T0]] ; CHECK-NEXT: [[T2:%.*]] = and i8 [[T1]], [[X:%.*]] -; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[T2]], [[X]] +; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[T2]], 0 ; CHECK-NEXT: ret i1 [[RET]] ; %t0 = shl i8 1, %y diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll index f156d9bf007cb..d5826524f1637 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll @@ -251,9 +251,9 @@ define i1 @n1(i8 %x, i8 %y) { ; CHECK-LABEL: @n1( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = add i8 [[T0]], -1 +; CHECK-NEXT: [[T1:%.*]] = sub i8 0, [[T0]] ; CHECK-NEXT: [[T2:%.*]] = and i8 [[T1]], [[X:%.*]] -; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[T2]], [[X]] +; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[T2]], 0 ; CHECK-NEXT: ret i1 [[RET]] ; %t0 = shl i8 -1, %y ; not 1 @@ -268,9 +268,9 @@ define i1 @n2(i8 %x, i8 %y) { ; CHECK-LABEL: @n2( ; CHECK-NEXT: [[T0:%.*]] = shl nuw i8 1, [[Y:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = add nuw i8 [[T0]], 1 +; CHECK-NEXT: [[T1:%.*]] = sub nuw i8 -2, [[T0]] ; CHECK-NEXT: [[T2:%.*]] = and i8 [[T1]], [[X:%.*]] -; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[T2]], [[X]] +; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[T2]], 0 ; CHECK-NEXT: ret i1 [[RET]] ; %t0 = shl i8 1, %y diff --git a/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll b/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll index 5de3e89d7027a..8bb7fd0e522cb 100644 --- a/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll +++ b/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll @@ -5,9 +5,9 @@ declare void @use.i8(i8) declare void @use.i16(i16) define i1 @src_is_mask_zext(i16 %x_in, i8 %y) { ; CHECK-LABEL: @src_is_mask_zext( -; CHECK-NEXT: [[X:%.*]] = xor i16 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[M_IN:%.*]] = lshr i8 -1, [[Y:%.*]] ; CHECK-NEXT: [[MASK:%.*]] = zext i8 [[M_IN]] to i16 +; CHECK-NEXT: [[X:%.*]] = xor i16 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[R:%.*]] = icmp ule i16 [[X]], [[MASK]] ; CHECK-NEXT: ret i1 [[R]] ; @@ -22,11 +22,11 @@ define i1 @src_is_mask_zext(i16 %x_in, i8 %y) { define i1 @src_is_mask_zext_fail_not_mask(i16 %x_in, i8 %y) { ; CHECK-LABEL: @src_is_mask_zext_fail_not_mask( -; CHECK-NEXT: [[X:%.*]] = xor i16 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[M_IN:%.*]] = lshr i8 -2, [[Y:%.*]] ; CHECK-NEXT: [[MASK:%.*]] = zext i8 [[M_IN]] to i16 -; CHECK-NEXT: [[AND:%.*]] = and i16 [[X]], [[MASK]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[AND]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i16 [[X_IN:%.*]], -124 +; CHECK-NEXT: [[TMP2:%.*]] = or i16 [[TMP1]], [[MASK]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[TMP2]], -1 ; CHECK-NEXT: ret i1 [[R]] ; %x = xor i16 %x_in, 123 @@ -80,10 +80,10 @@ define i1 @src_is_mask_sext_fail_multiuse(i16 %x_in, i8 %y) { define i1 @src_is_mask_and(i8 %x_in, i8 %y, i8 %z) { ; CHECK-LABEL: @src_is_mask_and( -; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[MY:%.*]] = lshr i8 7, [[Y:%.*]] ; CHECK-NEXT: [[MZ:%.*]] = lshr i8 -1, [[Z:%.*]] ; CHECK-NEXT: [[MASK:%.*]] = and i8 [[MY]], [[MZ]] +; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[MASK]] ; CHECK-NEXT: ret i1 [[R]] ; @@ -99,12 +99,12 @@ define i1 @src_is_mask_and(i8 %x_in, i8 %y, i8 %z) { define i1 @src_is_mask_and_fail_mixed(i8 %x_in, i8 %y, i8 %z) { ; CHECK-LABEL: @src_is_mask_and_fail_mixed( -; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[MY:%.*]] = ashr i8 -8, [[Y:%.*]] ; CHECK-NEXT: [[MZ:%.*]] = lshr i8 -1, [[Z:%.*]] ; CHECK-NEXT: [[MASK:%.*]] = and i8 [[MY]], [[MZ]] -; CHECK-NEXT: [[AND:%.*]] = and i8 [[X]], [[MASK]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[X]], [[AND]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], -124 +; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[MASK]], [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP2]], -1 ; CHECK-NEXT: ret i1 [[R]] ; %x = xor i8 %x_in, 123 @@ -119,9 +119,9 @@ define i1 @src_is_mask_and_fail_mixed(i8 %x_in, i8 %y, i8 %z) { define i1 @src_is_mask_or(i8 %x_in, i8 %y) { ; CHECK-LABEL: @src_is_mask_or( -; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[MY:%.*]] = lshr i8 -1, [[Y:%.*]] ; CHECK-NEXT: [[MASK:%.*]] = and i8 [[MY]], 7 +; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[MASK]] ; CHECK-NEXT: ret i1 [[R]] ; @@ -136,9 +136,9 @@ define i1 @src_is_mask_or(i8 %x_in, i8 %y) { define i1 @src_is_mask_xor(i8 %x_in, i8 %y) { ; CHECK-LABEL: @src_is_mask_xor( -; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1 ; CHECK-NEXT: [[MASK:%.*]] = xor i8 [[Y_M1]], [[Y]] +; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]] ; CHECK-NEXT: ret i1 [[R]] ; @@ -152,11 +152,11 @@ define i1 @src_is_mask_xor(i8 %x_in, i8 %y) { define i1 @src_is_mask_xor_fail_notmask(i8 %x_in, i8 %y) { ; CHECK-LABEL: @src_is_mask_xor_fail_notmask( -; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[Y:%.*]] ; CHECK-NEXT: [[NOTMASK:%.*]] = xor i8 [[TMP1]], [[Y]] -; CHECK-NEXT: [[AND:%.*]] = and i8 [[X]], [[NOTMASK]] -; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[AND]], [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[X_IN:%.*]], -124 +; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[NOTMASK]], [[TMP2]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[TMP3]], -1 ; CHECK-NEXT: ret i1 [[R]] ; %x = xor i8 %x_in, 123 @@ -170,10 +170,10 @@ define i1 @src_is_mask_xor_fail_notmask(i8 %x_in, i8 %y) { define i1 @src_is_mask_select(i8 %x_in, i8 %y, i1 %cond) { ; CHECK-LABEL: @src_is_mask_select( -; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1 ; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]] ; CHECK-NEXT: [[MASK:%.*]] = select i1 [[COND:%.*]], i8 [[YMASK]], i8 15 +; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]] ; CHECK-NEXT: ret i1 [[R]] ; @@ -245,11 +245,11 @@ define i1 @src_is_mask_shl_lshr_fail_not_allones(i8 %x_in, i8 %y, i1 %cond) { define i1 @src_is_mask_lshr(i8 %x_in, i8 %y, i8 %z, i1 %cond) { ; CHECK-LABEL: @src_is_mask_lshr( -; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1 ; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]] ; CHECK-NEXT: [[SMASK:%.*]] = select i1 [[COND:%.*]], i8 [[YMASK]], i8 15 ; CHECK-NEXT: [[MASK:%.*]] = lshr i8 [[SMASK]], [[Z:%.*]] +; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]] ; CHECK-NEXT: ret i1 [[R]] ; @@ -265,11 +265,11 @@ define i1 @src_is_mask_lshr(i8 %x_in, i8 %y, i8 %z, i1 %cond) { define i1 @src_is_mask_ashr(i8 %x_in, i8 %y, i8 %z, i1 %cond) { ; CHECK-LABEL: @src_is_mask_ashr( -; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1 ; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]] ; CHECK-NEXT: [[SMASK:%.*]] = select i1 [[COND:%.*]], i8 [[YMASK]], i8 15 ; CHECK-NEXT: [[MASK:%.*]] = ashr i8 [[SMASK]], [[Z:%.*]] +; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]] ; CHECK-NEXT: ret i1 [[R]] ; @@ -285,9 +285,9 @@ define i1 @src_is_mask_ashr(i8 %x_in, i8 %y, i8 %z, i1 %cond) { define i1 @src_is_mask_p2_m1(i8 %x_in, i8 %y) { ; CHECK-LABEL: @src_is_mask_p2_m1( -; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[P2ORZ:%.*]] = shl i8 2, [[Y:%.*]] ; CHECK-NEXT: [[MASK:%.*]] = add i8 [[P2ORZ]], -1 +; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]] ; CHECK-NEXT: ret i1 [[R]] ; @@ -301,10 +301,10 @@ define i1 @src_is_mask_p2_m1(i8 %x_in, i8 %y) { define i1 @src_is_mask_umax(i8 %x_in, i8 %y) { ; CHECK-LABEL: @src_is_mask_umax( -; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1 ; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]] ; CHECK-NEXT: [[MASK:%.*]] = call i8 @llvm.umax.i8(i8 [[YMASK]], i8 3) +; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]] ; CHECK-NEXT: ret i1 [[R]] ; @@ -320,11 +320,11 @@ define i1 @src_is_mask_umax(i8 %x_in, i8 %y) { define i1 @src_is_mask_umin(i8 %x_in, i8 %y, i8 %z) { ; CHECK-LABEL: @src_is_mask_umin( -; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1 ; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]] ; CHECK-NEXT: [[ZMASK:%.*]] = lshr i8 15, [[Z:%.*]] ; CHECK-NEXT: [[MASK:%.*]] = call i8 @llvm.umin.i8(i8 [[YMASK]], i8 [[ZMASK]]) +; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]] ; CHECK-NEXT: ret i1 [[R]] ; @@ -341,12 +341,12 @@ define i1 @src_is_mask_umin(i8 %x_in, i8 %y, i8 %z) { define i1 @src_is_mask_umin_fail_mismatch(i8 %x_in, i8 %y) { ; CHECK-LABEL: @src_is_mask_umin_fail_mismatch( -; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1 ; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]] ; CHECK-NEXT: [[MASK:%.*]] = call i8 @llvm.umin.i8(i8 [[YMASK]], i8 -32) -; CHECK-NEXT: [[AND:%.*]] = and i8 [[MASK]], [[X]] -; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[AND]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], -124 +; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[MASK]], [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[TMP2]], -1 ; CHECK-NEXT: ret i1 [[R]] ; %x = xor i8 %x_in, 123 @@ -361,10 +361,10 @@ define i1 @src_is_mask_umin_fail_mismatch(i8 %x_in, i8 %y) { define i1 @src_is_mask_smax(i8 %x_in, i8 %y) { ; CHECK-LABEL: @src_is_mask_smax( -; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1 ; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]] ; CHECK-NEXT: [[MASK:%.*]] = call i8 @llvm.smax.i8(i8 [[YMASK]], i8 -1) +; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[MASK]] ; CHECK-NEXT: ret i1 [[R]] ; @@ -380,10 +380,10 @@ define i1 @src_is_mask_smax(i8 %x_in, i8 %y) { define i1 @src_is_mask_smin(i8 %x_in, i8 %y) { ; CHECK-LABEL: @src_is_mask_smin( -; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1 ; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]] ; CHECK-NEXT: [[MASK:%.*]] = call i8 @llvm.smin.i8(i8 [[YMASK]], i8 0) +; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[MASK]] ; CHECK-NEXT: ret i1 [[R]] ; @@ -399,9 +399,9 @@ define i1 @src_is_mask_smin(i8 %x_in, i8 %y) { define i1 @src_is_mask_bitreverse_not_mask(i8 %x_in, i8 %y) { ; CHECK-LABEL: @src_is_mask_bitreverse_not_mask( -; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[NMASK:%.*]] = shl nsw i8 -1, [[Y:%.*]] ; CHECK-NEXT: [[MASK:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[NMASK]]) +; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[MASK]] ; CHECK-NEXT: ret i1 [[R]] ; @@ -417,7 +417,7 @@ define i1 @src_is_mask_bitreverse_not_mask(i8 %x_in, i8 %y) { define i1 @src_is_notmask_sext(i16 %x_in, i8 %y) { ; CHECK-LABEL: @src_is_notmask_sext( ; CHECK-NEXT: [[M_IN:%.*]] = shl i8 -8, [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = xor i16 [[X_IN:%.*]], -124 +; CHECK-NEXT: [[TMP1:%.*]] = xor i16 [[X_IN:%.*]], -128 ; CHECK-NEXT: [[TMP2:%.*]] = sext i8 [[M_IN]] to i16 ; CHECK-NEXT: [[R:%.*]] = icmp uge i16 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i1 [[R]] @@ -529,12 +529,11 @@ define i1 @src_is_notmask_lshr_shl(i8 %x_in, i8 %y) { define i1 @src_is_notmask_lshr_shl_fail_mismatch_shifts(i8 %x_in, i8 %y, i8 %z) { ; CHECK-LABEL: @src_is_notmask_lshr_shl_fail_mismatch_shifts( -; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123 ; CHECK-NEXT: [[MASK_SHR:%.*]] = lshr i8 -1, [[Y:%.*]] ; CHECK-NEXT: [[NMASK:%.*]] = shl i8 [[MASK_SHR]], [[Z:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = xor i8 [[NMASK]], -1 -; CHECK-NEXT: [[AND:%.*]] = and i8 [[X]], [[MASK]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[AND]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], 123 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[NMASK]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP2]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %x = xor i8 %x_in, 123 diff --git a/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll index e95c72b75f97d..0f26be12c39cc 100644 --- a/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll +++ b/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll @@ -3,6 +3,7 @@ declare i1 @barrier() declare void @llvm.assume(i1) +declare void @use.i8(i8) define i1 @icmp_ult_x_y(i8 %x, i8 %y) { ; CHECK-LABEL: @icmp_ult_x_y( @@ -238,9 +239,9 @@ define i1 @icmp_sle_negx_y_fail_maybe_zero(i8 %x, i8 %y) { define i1 @icmp_eq_x_invertable_y_todo(i8 %x, i1 %y) { ; CHECK-LABEL: @icmp_eq_x_invertable_y_todo( -; CHECK-NEXT: [[YY:%.*]] = select i1 [[Y:%.*]], i8 7, i8 24 +; CHECK-NEXT: [[YY:%.*]] = select i1 [[Y:%.*]], i8 -8, i8 -25 ; CHECK-NEXT: [[AND:%.*]] = and i8 [[YY]], [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[AND]], [[X]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[AND]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %yy = select i1 %y, i8 7, i8 24 @@ -251,22 +252,36 @@ define i1 @icmp_eq_x_invertable_y_todo(i8 %x, i1 %y) { define i1 @icmp_eq_x_invertable_y(i8 %x, i8 %y) { ; CHECK-LABEL: @icmp_eq_x_invertable_y( +; CHECK-NEXT: [[AND:%.*]] = and i8 [[YY:%.*]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[AND]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %yy = xor i8 %y, -1 + %and = and i8 %x, %yy + %r = icmp eq i8 %x, %and + ret i1 %r +} + +define i1 @icmp_eq_x_invertable_y_fail_multiuse(i8 %x, i8 %y) { +; CHECK-LABEL: @icmp_eq_x_invertable_y_fail_multiuse( ; CHECK-NEXT: [[YY:%.*]] = xor i8 [[Y:%.*]], -1 ; CHECK-NEXT: [[AND:%.*]] = and i8 [[YY]], [[X:%.*]] +; CHECK-NEXT: call void @use.i8(i8 [[AND]]) ; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[AND]], [[X]] ; CHECK-NEXT: ret i1 [[R]] ; %yy = xor i8 %y, -1 %and = and i8 %x, %yy + call void @use.i8(i8 %and) %r = icmp eq i8 %x, %and ret i1 %r } define i1 @icmp_eq_x_invertable_y2_todo(i8 %x, i1 %y) { ; CHECK-LABEL: @icmp_eq_x_invertable_y2_todo( -; CHECK-NEXT: [[YY:%.*]] = select i1 [[Y:%.*]], i8 7, i8 24 -; CHECK-NEXT: [[AND:%.*]] = and i8 [[YY]], [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[YY]], [[AND]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[Y:%.*]], i8 -8, i8 -25 +; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP2]], -1 ; CHECK-NEXT: ret i1 [[R]] ; %yy = select i1 %y, i8 7, i8 24 @@ -277,9 +292,8 @@ define i1 @icmp_eq_x_invertable_y2_todo(i8 %x, i1 %y) { define i1 @icmp_eq_x_invertable_y2(i8 %x, i8 %y) { ; CHECK-LABEL: @icmp_eq_x_invertable_y2( -; CHECK-NEXT: [[YY:%.*]] = xor i8 [[Y:%.*]], -1 -; CHECK-NEXT: [[AND:%.*]] = and i8 [[YY]], [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[AND]], [[YY]] +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP1]], -1 ; CHECK-NEXT: ret i1 [[R]] ; %yy = xor i8 %y, -1 diff --git a/llvm/test/Transforms/InstCombine/lshr.ll b/llvm/test/Transforms/InstCombine/lshr.ll index fa92c1c4b3be4..dfdb6c7b4b268 100644 --- a/llvm/test/Transforms/InstCombine/lshr.ll +++ b/llvm/test/Transforms/InstCombine/lshr.ll @@ -628,12 +628,12 @@ define i32 @mul_splat_fold_wrong_lshr_const(i32 %x) { ret i32 %t } -; Negative test +; Negative test (but simplifies into a different transform) define i32 @mul_splat_fold_no_nuw(i32 %x) { ; CHECK-LABEL: @mul_splat_fold_no_nuw( -; CHECK-NEXT: [[M:%.*]] = mul nsw i32 [[X:%.*]], 65537 -; CHECK-NEXT: [[T:%.*]] = lshr i32 [[M]], 16 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 16 +; CHECK-NEXT: [[T:%.*]] = add nsw i32 [[TMP1]], [[X]] ; CHECK-NEXT: ret i32 [[T]] ; %m = mul nsw i32 %x, 65537 @@ -641,6 +641,19 @@ define i32 @mul_splat_fold_no_nuw(i32 %x) { ret i32 %t } +; Negative test + +define i32 @mul_splat_fold_no_flags(i32 %x) { +; CHECK-LABEL: @mul_splat_fold_no_flags( +; CHECK-NEXT: [[M:%.*]] = mul i32 [[X:%.*]], 65537 +; CHECK-NEXT: [[T:%.*]] = lshr i32 [[M]], 16 +; CHECK-NEXT: ret i32 [[T]] +; + %m = mul i32 %x, 65537 + %t = lshr i32 %m, 16 + ret i32 %t +} + ; Negative test (but simplifies before we reach the mul_splat transform)- need more than 2 bits define i2 @mul_splat_fold_too_narrow(i2 %x) { diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/logf128.ll b/llvm/test/Transforms/InstSimplify/ConstProp/logf128.ll new file mode 100644 index 0000000000000..da56997f69382 --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/ConstProp/logf128.ll @@ -0,0 +1,126 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes=instsimplify -S | FileCheck %s + +; REQUIRES: has_logf128 +declare fp128 @llvm.log.f128(fp128) + +define fp128 @log_e_64(){ +; CHECK-LABEL: define fp128 @log_e_64() { +; CHECK-NEXT: ret fp128 0xL300000000000000040010A2B23F3BAB7 +; + %A = call fp128 @llvm.log.f128(fp128 noundef 0xL00000000000000004005000000000000) + ret fp128 %A +} + +define fp128 @log_e_smallest_positive_subnormal_number(){ +; CHECK-LABEL: define fp128 @log_e_smallest_positive_subnormal_number() { +; CHECK-NEXT: ret fp128 0xL3000000000000000C00C654628220780 +; + %A = call fp128 @llvm.log.f128(fp128 noundef 0xL00000000000000010000000000000000) + ret fp128 %A +} + +define fp128 @log_e_largest_subnormal_number(){ +; CHECK-LABEL: define fp128 @log_e_largest_subnormal_number() { +; CHECK-NEXT: ret fp128 0xLD000000000000000C00C62D918CE2421 +; + %A = call fp128 @llvm.log.f128(fp128 noundef 0xLFFFFFFFFFFFFFFFF0000FFFFFFFFFFFF) + ret fp128 %A +} + +define fp128 @log_e_smallest_positive_normal_number(){ +; +; CHECK-LABEL: define fp128 @log_e_smallest_positive_normal_number() { +; CHECK-NEXT: ret fp128 0xLD000000000000000C00C62D918CE2421 +; + %A = call fp128 @llvm.log.f128(fp128 noundef 0xL00000000000000000001000000000000) + ret fp128 %A +} + +define fp128 @log_e_largest_normal_number(){ +; CHECK-LABEL: define fp128 @log_e_largest_normal_number() { +; CHECK-NEXT: ret fp128 0xLF000000000000000400C62E42FEFA39E +; + %A = call fp128 @llvm.log.f128(fp128 noundef 0xLFFFFFFFFFFFFFFFF7FFEFFFFFFFFFFFF) + ret fp128 %A +} + +define fp128 @log_e_largest_number_less_than_one(){ +; CHECK-LABEL: define fp128 @log_e_largest_number_less_than_one() { +; CHECK-NEXT: ret fp128 0xL0000000000000000BF8E000000000000 +; + %A = call fp128 @llvm.log.f128(fp128 noundef 0xLFFFFFFFFFFFFFFFF3FFEFFFFFFFFFFFF) + ret fp128 %A +} + +define fp128 @log_e_1(){ +; CHECK-LABEL: define fp128 @log_e_1() { +; CHECK-NEXT: ret fp128 0xL00000000000000000000000000000000 +; + %A = call fp128 @llvm.log.f128(fp128 noundef 0xL00000000000000003FFF000000000000) + ret fp128 %A +} + +define fp128 @log_e_smallest_number_larger_than_one(){ +; CHECK-LABEL: define fp128 @log_e_smallest_number_larger_than_one() { +; CHECK-NEXT: ret fp128 0xL00000000000000003F8F000000000000 +; + %A = call fp128 @llvm.log.f128(fp128 noundef 0xL00000000000000013FFF000000000000) + ret fp128 %A +} + +define fp128 @log_e_negative_2(){ +; CHECK-LABEL: define fp128 @log_e_negative_2() { +; CHECK-NEXT: ret fp128 0xL00000000000000007FFF800000000000 +; + %A = call fp128 @llvm.log.f128(fp128 noundef 0xL0000000000000000C000000000000000) + ret fp128 %A +} + +define fp128 @log_e_0(){ +; CHECK-LABEL: define fp128 @log_e_0() { +; CHECK-NEXT: ret fp128 0xL0000000000000000FFFF000000000000 +; + %A = call fp128 @llvm.log.f128(fp128 noundef 0xL00000000000000000000000000000000) + ret fp128 %A +} + +define fp128 @log_e_negative_0(){ +; CHECK-LABEL: define fp128 @log_e_negative_0() { +; CHECK-NEXT: ret fp128 0xL0000000000000000FFFF000000000000 +; + %A = call fp128 @llvm.log.f128(fp128 noundef 0xL00000000000000008000000000000000) + ret fp128 %A +} + +define fp128 @log_e_infinity(){ +; CHECK-LABEL: define fp128 @log_e_infinity() { +; CHECK-NEXT: ret fp128 0xL00000000000000007FFF000000000000 +; + %A = call fp128 @llvm.log.f128(fp128 noundef 0xL00000000000000007FFF000000000000) + ret fp128 %A +} + +define fp128 @log_e_negative_infinity(){ +; CHECK-LABEL: define fp128 @log_e_negative_infinity() { +; CHECK-NEXT: ret fp128 0xL00000000000000007FFF800000000000 +; + %A = call fp128 @llvm.log.f128(fp128 noundef 0xL0000000000000000FFFF000000000000) + ret fp128 %A +} + +define fp128 @log_e_nan(){ +; CHECK-LABEL: define fp128 @log_e_nan() { +; CHECK-NEXT: ret fp128 0xL00000000000000007FFF800000000001 +; + %A = call fp128 @llvm.log.f128(fp128 noundef 0xL00000000000000007FFF000000000001) + ret fp128 %A +} + +define <2 x fp128> @log_e_negative_2_vector(){ +; CHECK-LABEL: define <2 x fp128> @log_e_negative_2_vector() { +; CHECK-NEXT: ret <2 x fp128> +; + %A = call <2 x fp128> @llvm.log.v2f128(<2 x fp128> ) + ret <2 x fp128> %A +} diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vectorgep-crash.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vectorgep-crash.ll index 5f55450120620..00ee7f8a92b21 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vectorgep-crash.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vectorgep-crash.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -passes=instcombine -S -o - %s | FileCheck %s ; Tests that we don't crash upon encountering a vector GEP @@ -23,17 +24,21 @@ top: %struct.C = type { i64 } @G = internal global [65 x %struct.A] zeroinitializer, align 16 -; CHECK-LABEL: @test -; CHECK: ret <16 x ptr> getelementptr ([65 x %struct.A], ptr @G, <16 x i64> zeroinitializer, <16 x i64> , i32 0) define <16 x ptr> @test() { +; CHECK-LABEL: define <16 x ptr> @test() { +; CHECK-NEXT: [[VECTOR_BODY:.*:]] +; CHECK-NEXT: ret <16 x ptr> getelementptr ([65 x %struct.A], ptr @G, <16 x i64> zeroinitializer, <16 x i64> , i32 0) +; vector.body: %VectorGep = getelementptr [65 x %struct.A], ptr @G, <16 x i64> zeroinitializer, <16 x i64> , <16 x i32> zeroinitializer ret <16 x ptr> %VectorGep } -; CHECK-LABEL: @test2 -; CHECK: ret <16 x ptr> getelementptr ([65 x %struct.A], ptr @G, <16 x i64> zeroinitializer, <16 x i64> @test2() { +; CHECK-LABEL: define <16 x ptr> @test2() { +; CHECK-NEXT: [[VECTOR_BODY:.*:]] +; CHECK-NEXT: ret <16 x ptr> getelementptr ([65 x %struct.A], ptr @G, <16 x i64> zeroinitializer, <16 x i64> , i32 0) +; vector.body: %VectorGep = getelementptr [65 x %struct.A], ptr @G, <16 x i32> zeroinitializer, <16 x i64> , <16 x i32> zeroinitializer ret <16 x ptr> %VectorGep @@ -42,7 +47,7 @@ vector.body: @g = external global i8, align 1 define <2 x ptr> @constant_zero_index() { -; CHECK-LABEL: @constant_zero_index( +; CHECK-LABEL: define <2 x ptr> @constant_zero_index() { ; CHECK-NEXT: ret <2 x ptr> ; %gep = getelementptr i8, ptr @g, <2 x i64> zeroinitializer @@ -50,7 +55,7 @@ define <2 x ptr> @constant_zero_index() { } define <2 x ptr> @constant_undef_index() { -; CHECK-LABEL: @constant_undef_index( +; CHECK-LABEL: define <2 x ptr> @constant_undef_index() { ; CHECK-NEXT: ret <2 x ptr> ; %gep = getelementptr i8, ptr @g, <2 x i64> undef @@ -58,7 +63,7 @@ define <2 x ptr> @constant_undef_index() { } define <2 x ptr> @constant_inbounds() { -; CHECK-LABEL: @constant_inbounds( +; CHECK-LABEL: define <2 x ptr> @constant_inbounds() { ; CHECK-NEXT: ret <2 x ptr> getelementptr inbounds (i8, ptr @g, <2 x i64> ) ; %gep = getelementptr i8, ptr @g, <2 x i64> diff --git a/llvm/test/Transforms/InstSimplify/vector_gep.ll b/llvm/test/Transforms/InstSimplify/vector_gep.ll index ba0d978ed5b3c..79aa9f13d1ea7 100644 --- a/llvm/test/Transforms/InstSimplify/vector_gep.ll +++ b/llvm/test/Transforms/InstSimplify/vector_gep.ll @@ -1,105 +1,120 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -S -passes=instsimplify < %s | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" declare void @helper(<2 x ptr>) define void @test(<2 x ptr> %a) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: <2 x ptr> [[A:%.*]]) { +; CHECK-NEXT: call void @helper(<2 x ptr> [[A]]) +; CHECK-NEXT: ret void +; %A = getelementptr i8, <2 x ptr> %a, <2 x i32> call void @helper(<2 x ptr> %A) ret void } define <4 x ptr> @test1(<4 x ptr> %a) { +; CHECK-LABEL: define <4 x ptr> @test1( +; CHECK-SAME: <4 x ptr> [[A:%.*]]) { +; CHECK-NEXT: ret <4 x ptr> [[A]] +; %gep = getelementptr i8, <4 x ptr> %a, <4 x i32> zeroinitializer ret <4 x ptr> %gep - -; CHECK-LABEL: @test1 -; CHECK-NEXT: ret <4 x ptr> %a } define <4 x ptr> @test2(<4 x ptr> %a) { +; CHECK-LABEL: define <4 x ptr> @test2( +; CHECK-SAME: <4 x ptr> [[A:%.*]]) { +; CHECK-NEXT: ret <4 x ptr> [[A]] +; %gep = getelementptr i8, <4 x ptr> %a ret <4 x ptr> %gep - -; CHECK-LABEL: @test2 -; CHECK-NEXT: ret <4 x ptr> %a } %struct = type { double, float } define <4 x ptr> @test3() { +; CHECK-LABEL: define <4 x ptr> @test3() { +; CHECK-NEXT: ret <4 x ptr> undef +; %gep = getelementptr %struct, <4 x ptr> undef, <4 x i32> , <4 x i32> ret <4 x ptr> %gep - -; CHECK-LABEL: @test3 -; CHECK-NEXT: ret <4 x ptr> undef } %struct.empty = type { } define <4 x ptr> @test4(<4 x ptr> %a) { +; CHECK-LABEL: define <4 x ptr> @test4( +; CHECK-SAME: <4 x ptr> [[A:%.*]]) { +; CHECK-NEXT: ret <4 x ptr> [[A]] +; %gep = getelementptr %struct.empty, <4 x ptr> %a, <4 x i32> ret <4 x ptr> %gep - -; CHECK-LABEL: @test4 -; CHECK-NEXT: ret <4 x ptr> %a } define <4 x ptr> @test5() { +; CHECK-LABEL: define <4 x ptr> @test5() { +; CHECK-NEXT: ret <4 x ptr> getelementptr (i8, <4 x ptr> , <4 x i64> ) +; %c = inttoptr <4 x i64> to <4 x ptr> %gep = getelementptr i8, <4 x ptr> %c, <4 x i32> ret <4 x ptr> %gep - -; CHECK-LABEL: @test5 -; CHECK-NEXT: ret <4 x ptr> getelementptr (i8, <4 x ptr> , <4 x i64> ) } @v = global [24 x [42 x [3 x i32]]] zeroinitializer, align 16 define <16 x ptr> @test6() { -; CHECK-LABEL: @test6 -; CHECK-NEXT: ret <16 x ptr> getelementptr inbounds ([24 x [42 x [3 x i32]]], ptr @v, <16 x i64> zeroinitializer, <16 x i64> zeroinitializer, <16 x i64> , <16 x i64> zeroinitializer) +; CHECK-LABEL: define <16 x ptr> @test6() { +; CHECK-NEXT: ret <16 x ptr> getelementptr inbounds ([24 x [42 x [3 x i32]]], ptr @v, <16 x i64> zeroinitializer, <16 x i64> zeroinitializer, <16 x i64> , <16 x i64> zeroinitializer) +; %VectorGep = getelementptr [24 x [42 x [3 x i32]]], ptr @v, i64 0, i64 0, <16 x i64> , i64 0 ret <16 x ptr> %VectorGep } ; PR32697 -; CHECK-LABEL: tinkywinky( -; CHECK-NEXT: ret <4 x ptr> undef define <4 x ptr> @tinkywinky() { +; CHECK-LABEL: define <4 x ptr> @tinkywinky() { +; CHECK-NEXT: ret <4 x ptr> undef +; %patatino = getelementptr i8, ptr undef, <4 x i64> undef ret <4 x ptr> %patatino } ; PR32697 -; CHECK-LABEL: dipsy( -; CHECK-NEXT: ret <4 x ptr> undef define <4 x ptr> @dipsy() { +; CHECK-LABEL: define <4 x ptr> @dipsy() { +; CHECK-NEXT: ret <4 x ptr> undef +; %patatino = getelementptr i8, <4 x ptr> undef, <4 x i64> undef ret <4 x ptr> %patatino } ; PR32697 -; CHECK-LABEL: laalaa( -; CHECK-NEXT: ret <4 x ptr> undef define <4 x ptr> @laalaa() { +; CHECK-LABEL: define <4 x ptr> @laalaa() { +; CHECK-NEXT: ret <4 x ptr> undef +; %patatino = getelementptr i8, <4 x ptr> undef, i64 undef ret <4 x ptr> %patatino } define <2 x ptr> @zero_index(ptr %p) { -; CHECK-LABEL: @zero_index( -; CHECK-NEXT: %gep = getelementptr i8, ptr %p, <2 x i64> zeroinitializer -; CHECK-NEXT: ret <2 x ptr> %gep +; CHECK-LABEL: define <2 x ptr> @zero_index( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[P]], <2 x i64> zeroinitializer +; CHECK-NEXT: ret <2 x ptr> [[GEP]] ; %gep = getelementptr i8, ptr %p, <2 x i64> zeroinitializer ret <2 x ptr> %gep } define <2 x ptr> @unsized(ptr %p) { -; CHECK-LABEL: @unsized( -; CHECK-NEXT: %gep = getelementptr {}, ptr %p, <2 x i64> undef -; CHECK-NEXT: ret <2 x ptr> %gep +; CHECK-LABEL: define <2 x ptr> @unsized( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr {}, ptr [[P]], <2 x i64> undef +; CHECK-NEXT: ret <2 x ptr> [[GEP]] ; %gep = getelementptr {}, ptr %p, <2 x i64> undef ret <2 x ptr> %gep diff --git a/llvm/test/Transforms/LICM/update-scev-after-hoist.ll b/llvm/test/Transforms/LICM/update-scev-after-hoist.ll new file mode 100644 index 0000000000000..fc45b8fce1766 --- /dev/null +++ b/llvm/test/Transforms/LICM/update-scev-after-hoist.ll @@ -0,0 +1,24 @@ +; RUN: opt -S -passes='loop-unroll,loop-mssa(licm),print' -unroll-count=4 -disable-output < %s 2>&1 | FileCheck %s --check-prefix=SCEV-EXPR + +define i16 @main() { +; SCEV-EXPR: Classifying expressions for: @main +; SCEV-EXPR-NEXT: %mul = phi i16 [ 1, %entry ], [ %mul.n.3, %loop ] +; SCEV-EXPR-NEXT: --> %mul U: [0,-15) S: [-32768,32753) Exits: 4096 LoopDispositions: { %loop: Variant } +; SCEV-EXPR-NEXT: %div = phi i16 [ 32767, %entry ], [ %div.n.3, %loop ] +; SCEV-EXPR-NEXT: --> %div U: [-2048,-32768) S: [-2048,-32768) Exits: 7 LoopDispositions: { %loop: Variant } +; SCEV-EXPR-NEXT: %mul.n.reass.reass = mul i16 %mul, 8 +; SCEV-EXPR-NEXT: --> (8 * %mul) U: [0,-7) S: [-32768,32761) Exits: -32768 LoopDispositions: { %loop: Variant } +entry: + br label %loop + +loop: + %mul = phi i16 [ 1, %entry ], [ %mul.n, %loop ] + %div = phi i16 [ 32767, %entry ], [ %div.n, %loop ] + %mul.n = mul i16 %mul, 2 + %div.n = sdiv i16 %div, 2 + %cmp = icmp sgt i16 %div, 0 + br i1 %cmp, label %loop, label %end + +end: + ret i16 %mul +} diff --git a/llvm/test/Transforms/Reassociate/local-cse.ll b/llvm/test/Transforms/Reassociate/local-cse.ll index 4d0467e263f55..d0d609f022b46 100644 --- a/llvm/test/Transforms/Reassociate/local-cse.ll +++ b/llvm/test/Transforms/Reassociate/local-cse.ll @@ -26,16 +26,16 @@ define void @chain_spanning_several_blocks(i64 %inv1, i64 %inv2, i64 %inv3, i64 ; LOCAL_CSE-LABEL: define void @chain_spanning_several_blocks ; LOCAL_CSE-SAME: (i64 [[INV1:%.*]], i64 [[INV2:%.*]], i64 [[INV3:%.*]], i64 [[INV4:%.*]], i64 [[INV5:%.*]]) { ; LOCAL_CSE-NEXT: bb1: -; LOCAL_CSE-NEXT: [[CHAIN_A0:%.*]] = add nuw i64 [[INV2]], [[INV1]] +; LOCAL_CSE-NEXT: [[CHAIN_A0:%.*]] = add nuw nsw i64 [[INV2]], [[INV1]] ; LOCAL_CSE-NEXT: br label [[BB2:%.*]] ; LOCAL_CSE: bb2: ; LOCAL_CSE-NEXT: [[VAL_BB2:%.*]] = call i64 @get_val() -; LOCAL_CSE-NEXT: [[CHAIN_A1:%.*]] = add nuw i64 [[CHAIN_A0]], [[INV4]] -; LOCAL_CSE-NEXT: [[CHAIN_A2:%.*]] = add nuw i64 [[CHAIN_A1]], [[VAL_BB2]] -; LOCAL_CSE-NEXT: [[CHAIN_B1:%.*]] = add nuw i64 [[CHAIN_A0]], [[INV5]] -; LOCAL_CSE-NEXT: [[CHAIN_B2:%.*]] = add nuw i64 [[CHAIN_B1]], [[VAL_BB2]] -; LOCAL_CSE-NEXT: [[CHAIN_C0:%.*]] = add nuw i64 [[INV3]], [[INV1]] -; LOCAL_CSE-NEXT: [[CHAIN_C1:%.*]] = add nuw i64 [[CHAIN_C0]], [[VAL_BB2]] +; LOCAL_CSE-NEXT: [[CHAIN_A1:%.*]] = add nuw nsw i64 [[CHAIN_A0]], [[INV4]] +; LOCAL_CSE-NEXT: [[CHAIN_A2:%.*]] = add nuw nsw i64 [[CHAIN_A1]], [[VAL_BB2]] +; LOCAL_CSE-NEXT: [[CHAIN_B1:%.*]] = add nuw nsw i64 [[CHAIN_A0]], [[INV5]] +; LOCAL_CSE-NEXT: [[CHAIN_B2:%.*]] = add nuw nsw i64 [[CHAIN_B1]], [[VAL_BB2]] +; LOCAL_CSE-NEXT: [[CHAIN_C0:%.*]] = add nuw nsw i64 [[INV3]], [[INV1]] +; LOCAL_CSE-NEXT: [[CHAIN_C1:%.*]] = add nuw nsw i64 [[CHAIN_C0]], [[VAL_BB2]] ; LOCAL_CSE-NEXT: call void @keep_alive(i64 [[CHAIN_A2]]) ; LOCAL_CSE-NEXT: call void @keep_alive(i64 [[CHAIN_B2]]) ; LOCAL_CSE-NEXT: call void @keep_alive(i64 [[CHAIN_C1]]) @@ -47,11 +47,11 @@ define void @chain_spanning_several_blocks(i64 %inv1, i64 %inv2, i64 %inv3, i64 ; CSE-NEXT: br label [[BB2:%.*]] ; CSE: bb2: ; CSE-NEXT: [[VAL_BB2:%.*]] = call i64 @get_val() -; CSE-NEXT: [[CHAIN_A0:%.*]] = add nuw i64 [[VAL_BB2]], [[INV1]] -; CSE-NEXT: [[CHAIN_A1:%.*]] = add nuw i64 [[CHAIN_A0]], [[INV2]] +; CSE-NEXT: [[CHAIN_A0:%.*]] = add nuw nsw i64 [[VAL_BB2]], [[INV1]] +; CSE-NEXT: [[CHAIN_A1:%.*]] = add nuw nsw i64 [[CHAIN_A0]], [[INV2]] ; CSE-NEXT: [[CHAIN_A2:%.*]] = add nuw nsw i64 [[CHAIN_A1]], [[INV4]] ; CSE-NEXT: [[CHAIN_B2:%.*]] = add nuw nsw i64 [[CHAIN_A1]], [[INV5]] -; CSE-NEXT: [[CHAIN_C1:%.*]] = add nuw i64 [[CHAIN_A0]], [[INV3]] +; CSE-NEXT: [[CHAIN_C1:%.*]] = add nuw nsw i64 [[CHAIN_A0]], [[INV3]] ; CSE-NEXT: call void @keep_alive(i64 [[CHAIN_A2]]) ; CSE-NEXT: call void @keep_alive(i64 [[CHAIN_B2]]) ; CSE-NEXT: call void @keep_alive(i64 [[CHAIN_C1]]) @@ -90,19 +90,19 @@ define void @chain_spanning_several_blocks_no_entry_anchor() { ; LOCAL_CSE-NEXT: br label [[BB1:%.*]] ; LOCAL_CSE: bb1: ; LOCAL_CSE-NEXT: [[INV1_BB1:%.*]] = call i64 @get_val() -; LOCAL_CSE-NEXT: [[CHAIN_A0:%.*]] = add nuw i64 [[INV1_BB1]], [[INV2_BB0]] +; LOCAL_CSE-NEXT: [[CHAIN_A0:%.*]] = add nuw nsw i64 [[INV1_BB1]], [[INV2_BB0]] ; LOCAL_CSE-NEXT: br label [[BB2:%.*]] ; LOCAL_CSE: bb2: ; LOCAL_CSE-NEXT: [[INV3_BB2:%.*]] = call i64 @get_val() ; LOCAL_CSE-NEXT: [[INV4_BB2:%.*]] = call i64 @get_val() ; LOCAL_CSE-NEXT: [[INV5_BB2:%.*]] = call i64 @get_val() ; LOCAL_CSE-NEXT: [[VAL_BB2:%.*]] = call i64 @get_val() -; LOCAL_CSE-NEXT: [[CHAIN_A1:%.*]] = add nuw i64 [[CHAIN_A0]], [[INV4_BB2]] -; LOCAL_CSE-NEXT: [[CHAIN_A2:%.*]] = add nuw i64 [[CHAIN_A1]], [[VAL_BB2]] -; LOCAL_CSE-NEXT: [[CHAIN_B1:%.*]] = add nuw i64 [[CHAIN_A0]], [[INV5_BB2]] -; LOCAL_CSE-NEXT: [[CHAIN_B2:%.*]] = add nuw i64 [[CHAIN_B1]], [[VAL_BB2]] -; LOCAL_CSE-NEXT: [[CHAIN_C0:%.*]] = add nuw i64 [[VAL_BB2]], [[INV1_BB1]] -; LOCAL_CSE-NEXT: [[CHAIN_C1:%.*]] = add nuw i64 [[CHAIN_C0]], [[INV3_BB2]] +; LOCAL_CSE-NEXT: [[CHAIN_A1:%.*]] = add nuw nsw i64 [[CHAIN_A0]], [[INV4_BB2]] +; LOCAL_CSE-NEXT: [[CHAIN_A2:%.*]] = add nuw nsw i64 [[CHAIN_A1]], [[VAL_BB2]] +; LOCAL_CSE-NEXT: [[CHAIN_B1:%.*]] = add nuw nsw i64 [[CHAIN_A0]], [[INV5_BB2]] +; LOCAL_CSE-NEXT: [[CHAIN_B2:%.*]] = add nuw nsw i64 [[CHAIN_B1]], [[VAL_BB2]] +; LOCAL_CSE-NEXT: [[CHAIN_C0:%.*]] = add nuw nsw i64 [[VAL_BB2]], [[INV1_BB1]] +; LOCAL_CSE-NEXT: [[CHAIN_C1:%.*]] = add nuw nsw i64 [[CHAIN_C0]], [[INV3_BB2]] ; LOCAL_CSE-NEXT: call void @keep_alive(i64 [[CHAIN_A2]]) ; LOCAL_CSE-NEXT: call void @keep_alive(i64 [[CHAIN_B2]]) ; LOCAL_CSE-NEXT: call void @keep_alive(i64 [[CHAIN_C1]]) @@ -120,11 +120,11 @@ define void @chain_spanning_several_blocks_no_entry_anchor() { ; CSE-NEXT: [[INV4_BB2:%.*]] = call i64 @get_val() ; CSE-NEXT: [[INV5_BB2:%.*]] = call i64 @get_val() ; CSE-NEXT: [[VAL_BB2:%.*]] = call i64 @get_val() -; CSE-NEXT: [[CHAIN_A0:%.*]] = add nuw i64 [[VAL_BB2]], [[INV1_BB1]] -; CSE-NEXT: [[CHAIN_A1:%.*]] = add nuw i64 [[CHAIN_A0]], [[INV2_BB0]] +; CSE-NEXT: [[CHAIN_A0:%.*]] = add nuw nsw i64 [[VAL_BB2]], [[INV1_BB1]] +; CSE-NEXT: [[CHAIN_A1:%.*]] = add nuw nsw i64 [[CHAIN_A0]], [[INV2_BB0]] ; CSE-NEXT: [[CHAIN_A2:%.*]] = add nuw nsw i64 [[CHAIN_A1]], [[INV4_BB2]] ; CSE-NEXT: [[CHAIN_B2:%.*]] = add nuw nsw i64 [[CHAIN_A1]], [[INV5_BB2]] -; CSE-NEXT: [[CHAIN_C1:%.*]] = add nuw i64 [[CHAIN_A0]], [[INV3_BB2]] +; CSE-NEXT: [[CHAIN_C1:%.*]] = add nuw nsw i64 [[CHAIN_A0]], [[INV3_BB2]] ; CSE-NEXT: call void @keep_alive(i64 [[CHAIN_A2]]) ; CSE-NEXT: call void @keep_alive(i64 [[CHAIN_B2]]) ; CSE-NEXT: call void @keep_alive(i64 [[CHAIN_C1]]) diff --git a/llvm/test/Transforms/Reassociate/reassoc-add-nsw.ll b/llvm/test/Transforms/Reassociate/reassoc-add-nsw.ll new file mode 100644 index 0000000000000..fcebc4980e6d7 --- /dev/null +++ b/llvm/test/Transforms/Reassociate/reassoc-add-nsw.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes=reassociate -S | FileCheck %s +define i32 @nsw_preserve_nonnegative(ptr %ptr0, ptr %ptr1, ptr %ptr2) { +; CHECK-LABEL: define i32 @nsw_preserve_nonnegative( +; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], ptr [[PTR2:%.*]]) { +; CHECK-NEXT: [[V0:%.*]] = load i32, ptr [[PTR0]], align 4, !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[PTR1]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[PTR2]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[ADD0:%.*]] = add nsw i32 [[V1]], [[V0]] +; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD0]], [[V2]] +; CHECK-NEXT: ret i32 [[ADD1]] +; + %v0 = load i32, ptr %ptr0, !range !1 + %v1 = load i32, ptr %ptr1, !range !1 + %v2 = load i32, ptr %ptr2, !range !1 + %add0 = add nsw i32 %v1, %v2 + %add1 = add nsw i32 %add0, %v0 + ret i32 %add1 +} + +define i32 @nsw_preserve_nuw_nsw(ptr %ptr0, ptr %ptr1, ptr %ptr2) { +; CHECK-LABEL: define i32 @nsw_preserve_nuw_nsw( +; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], ptr [[PTR2:%.*]]) { +; CHECK-NEXT: [[V0:%.*]] = load i32, ptr [[PTR0]], align 4 +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[PTR1]], align 4 +; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[PTR2]], align 4 +; CHECK-NEXT: [[ADD0:%.*]] = add nuw nsw i32 [[V1]], [[V0]] +; CHECK-NEXT: [[ADD1:%.*]] = add nuw nsw i32 [[ADD0]], [[V2]] +; CHECK-NEXT: ret i32 [[ADD1]] +; + %v0 = load i32, ptr %ptr0 + %v1 = load i32, ptr %ptr1 + %v2 = load i32, ptr %ptr2 + %add0 = add nuw nsw i32 %v1, %v2 + %add1 = add nuw nsw i32 %add0, %v0 + ret i32 %add1 +} + +define i32 @nsw_dont_preserve_negative(ptr %ptr0, ptr %ptr1, ptr %ptr2) { +; CHECK-LABEL: define i32 @nsw_dont_preserve_negative( +; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], ptr [[PTR2:%.*]]) { +; CHECK-NEXT: [[V0:%.*]] = load i32, ptr [[PTR0]], align 4 +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[PTR1]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[PTR2]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[ADD0:%.*]] = add i32 [[V1]], [[V0]] +; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[ADD0]], [[V2]] +; CHECK-NEXT: ret i32 [[ADD1]] +; + %v0 = load i32, ptr %ptr0 + %v1 = load i32, ptr %ptr1, !range !1 + %v2 = load i32, ptr %ptr2, !range !1 + %add0 = add nsw i32 %v1, %v2 + %add1 = add nsw i32 %add0, %v0 + ret i32 %add1 +} + +define i32 @nsw_nopreserve_notallnsw(ptr %ptr0, ptr %ptr1, ptr %ptr2) { +; CHECK-LABEL: define i32 @nsw_nopreserve_notallnsw( +; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], ptr [[PTR2:%.*]]) { +; CHECK-NEXT: [[V0:%.*]] = load i32, ptr [[PTR0]], align 4, !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[PTR1]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[PTR2]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[ADD0:%.*]] = add i32 [[V1]], [[V0]] +; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[ADD0]], [[V2]] +; CHECK-NEXT: ret i32 [[ADD1]] +; + %v0 = load i32, ptr %ptr0, !range !1 + %v1 = load i32, ptr %ptr1, !range !1 + %v2 = load i32, ptr %ptr2, !range !1 + %add0 = add nsw i32 %v1, %v2 + %add1 = add i32 %add0, %v0 + ret i32 %add1 +} + +; Positive 32 bit integers +!1 = !{i32 0, i32 2147483648} +;. +; CHECK: [[RNG0]] = !{i32 0, i32 -2147483648} +;. diff --git a/llvm/test/Transforms/Reassociate/reassoc_bool_vec.ll b/llvm/test/Transforms/Reassociate/reassoc_bool_vec.ll index fcedde23ecc7f..bd0060cc5abbd 100644 --- a/llvm/test/Transforms/Reassociate/reassoc_bool_vec.ll +++ b/llvm/test/Transforms/Reassociate/reassoc_bool_vec.ll @@ -57,13 +57,12 @@ define <8 x i1> @vector2(<8 x i1> %a, <8 x i1> %b0, <8 x i1> %b1, <8 x i1> %b2, ; CHECK-NEXT: [[OR6:%.*]] = or <8 x i1> [[B6]], [[A]] ; CHECK-NEXT: [[OR7:%.*]] = or <8 x i1> [[B7]], [[A]] ; CHECK-NEXT: [[XOR0:%.*]] = xor <8 x i1> [[OR1]], [[OR0]] -; CHECK-NEXT: [[XOR1:%.*]] = xor <8 x i1> [[XOR0]], [[OR2]] -; CHECK-NEXT: [[XOR2:%.*]] = xor <8 x i1> [[XOR1]], [[OR3]] -; CHECK-NEXT: [[XOR3:%.*]] = xor <8 x i1> [[XOR2]], [[OR4]] +; CHECK-NEXT: [[XOR2:%.*]] = xor <8 x i1> [[XOR0]], [[OR2]] +; CHECK-NEXT: [[OR045:%.*]] = xor <8 x i1> [[XOR2]], [[OR3]] +; CHECK-NEXT: [[XOR3:%.*]] = xor <8 x i1> [[OR045]], [[OR4]] ; CHECK-NEXT: [[XOR4:%.*]] = xor <8 x i1> [[XOR3]], [[OR5]] ; CHECK-NEXT: [[XOR5:%.*]] = xor <8 x i1> [[XOR4]], [[OR6]] ; CHECK-NEXT: [[XOR6:%.*]] = xor <8 x i1> [[XOR5]], [[OR7]] -; CHECK-NEXT: [[OR045:%.*]] = or <8 x i1> [[XOR1]], [[XOR0]] ; CHECK-NEXT: [[OR4560:%.*]] = or <8 x i1> [[OR045]], [[XOR2]] ; CHECK-NEXT: [[OR023:%.*]] = or <8 x i1> [[OR4560]], [[XOR3]] ; CHECK-NEXT: [[OR001:%.*]] = or <8 x i1> [[OR023]], [[XOR4]] diff --git a/llvm/test/Transforms/Reassociate/repeats.ll b/llvm/test/Transforms/Reassociate/repeats.ll index c18db19fa73e3..28177f1c0ba5e 100644 --- a/llvm/test/Transforms/Reassociate/repeats.ll +++ b/llvm/test/Transforms/Reassociate/repeats.ll @@ -1,56 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt < %s -passes=reassociate -S | FileCheck %s ; Tests involving repeated operations on the same value. define i8 @nilpotent(i8 %x) { -; CHECK-LABEL: @nilpotent( +; CHECK-LABEL: define i8 @nilpotent( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: ret i8 0 +; %tmp = xor i8 %x, %x ret i8 %tmp -; CHECK: ret i8 0 } define i2 @idempotent(i2 %x) { -; CHECK-LABEL: @idempotent( +; CHECK-LABEL: define i2 @idempotent( +; CHECK-SAME: i2 [[X:%.*]]) { +; CHECK-NEXT: ret i2 -1 +; %tmp1 = and i2 %x, %x %tmp2 = and i2 %tmp1, %x %tmp3 = and i2 %tmp2, %x ret i2 %tmp3 -; CHECK: ret i2 %x } define i2 @add(i2 %x) { -; CHECK-LABEL: @add( +; CHECK-LABEL: define i2 @add( +; CHECK-SAME: i2 [[X:%.*]]) { +; CHECK-NEXT: ret i2 0 +; %tmp1 = add i2 %x, %x %tmp2 = add i2 %tmp1, %x %tmp3 = add i2 %tmp2, %x ret i2 %tmp3 -; CHECK: ret i2 0 } define i2 @cst_add() { -; CHECK-LABEL: @cst_add( +; CHECK-LABEL: define i2 @cst_add() { +; CHECK-NEXT: ret i2 -1 +; %tmp1 = add i2 1, 1 %tmp2 = add i2 %tmp1, 1 ret i2 %tmp2 -; CHECK: ret i2 -1 } define i8 @cst_mul() { -; CHECK-LABEL: @cst_mul( +; CHECK-LABEL: define i8 @cst_mul() { +; CHECK-NEXT: ret i8 -13 +; %tmp1 = mul i8 3, 3 %tmp2 = mul i8 %tmp1, 3 %tmp3 = mul i8 %tmp2, 3 %tmp4 = mul i8 %tmp3, 3 ret i8 %tmp4 -; CHECK: ret i8 -13 } define i3 @foo3x5(i3 %x) { ; Can be done with two multiplies. -; CHECK-LABEL: @foo3x5( -; CHECK-NEXT: mul -; CHECK-NEXT: mul -; CHECK-NEXT: ret +; CHECK-LABEL: define i3 @foo3x5( +; CHECK-SAME: i3 [[X:%.*]]) { +; CHECK-NEXT: [[TMP3:%.*]] = mul i3 [[X]], [[X]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i3 [[TMP3]], [[X]] +; CHECK-NEXT: [[TMP5:%.*]] = mul i3 [[TMP4]], [[TMP3]] +; CHECK-NEXT: ret i3 [[TMP5]] +; %tmp1 = mul i3 %x, %x %tmp2 = mul i3 %tmp1, %x %tmp3 = mul i3 %tmp2, %x @@ -58,12 +70,31 @@ define i3 @foo3x5(i3 %x) { ret i3 %tmp4 } +define i3 @foo3x5_nsw(i3 %x) { +; Can be done with two multiplies. +; CHECK-LABEL: define i3 @foo3x5_nsw( +; CHECK-SAME: i3 [[X:%.*]]) { +; CHECK-NEXT: [[TMP3:%.*]] = mul i3 [[X]], [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i3 [[TMP3]], [[X]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i3 [[TMP2]], [[TMP3]] +; CHECK-NEXT: ret i3 [[TMP4]] +; + %tmp1 = mul i3 %x, %x + %tmp2 = mul i3 %tmp1, %x + %tmp3 = mul i3 %tmp2, %x + %tmp4 = mul nsw i3 %tmp3, %x + ret i3 %tmp4 +} + define i3 @foo3x6(i3 %x) { ; Can be done with two multiplies. -; CHECK-LABEL: @foo3x6( -; CHECK-NEXT: mul -; CHECK-NEXT: mul -; CHECK-NEXT: ret +; CHECK-LABEL: define i3 @foo3x6( +; CHECK-SAME: i3 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = mul i3 [[X]], [[X]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i3 [[TMP1]], [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i3 [[TMP3]], [[TMP3]] +; CHECK-NEXT: ret i3 [[TMP2]] +; %tmp1 = mul i3 %x, %x %tmp2 = mul i3 %tmp1, %x %tmp3 = mul i3 %tmp2, %x @@ -74,10 +105,14 @@ define i3 @foo3x6(i3 %x) { define i3 @foo3x7(i3 %x) { ; Can be done with two multiplies. -; CHECK-LABEL: @foo3x7( -; CHECK-NEXT: mul -; CHECK-NEXT: mul -; CHECK-NEXT: ret +; CHECK-LABEL: define i3 @foo3x7( +; CHECK-SAME: i3 [[X:%.*]]) { +; CHECK-NEXT: [[TMP5:%.*]] = mul i3 [[X]], [[X]] +; CHECK-NEXT: [[TMP7:%.*]] = mul i3 [[TMP5]], [[X]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i3 [[TMP7]], [[X]] +; CHECK-NEXT: [[TMP6:%.*]] = mul i3 [[TMP3]], [[TMP7]] +; CHECK-NEXT: ret i3 [[TMP6]] +; %tmp1 = mul i3 %x, %x %tmp2 = mul i3 %tmp1, %x %tmp3 = mul i3 %tmp2, %x @@ -89,10 +124,13 @@ define i3 @foo3x7(i3 %x) { define i4 @foo4x8(i4 %x) { ; Can be done with two multiplies. -; CHECK-LABEL: @foo4x8( -; CHECK-NEXT: mul -; CHECK-NEXT: mul -; CHECK-NEXT: ret +; CHECK-LABEL: define i4 @foo4x8( +; CHECK-SAME: i4 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = mul i4 [[X]], [[X]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i4 [[TMP1]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i4 [[TMP3]], [[TMP3]] +; CHECK-NEXT: ret i4 [[TMP4]] +; %tmp1 = mul i4 %x, %x %tmp2 = mul i4 %tmp1, %x %tmp3 = mul i4 %tmp2, %x @@ -105,11 +143,14 @@ define i4 @foo4x8(i4 %x) { define i4 @foo4x9(i4 %x) { ; Can be done with three multiplies. -; CHECK-LABEL: @foo4x9( -; CHECK-NEXT: mul -; CHECK-NEXT: mul -; CHECK-NEXT: mul -; CHECK-NEXT: ret +; CHECK-LABEL: define i4 @foo4x9( +; CHECK-SAME: i4 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = mul i4 [[X]], [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i4 [[TMP1]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i4 [[TMP2]], [[X]] +; CHECK-NEXT: [[TMP8:%.*]] = mul i4 [[TMP3]], [[TMP2]] +; CHECK-NEXT: ret i4 [[TMP8]] +; %tmp1 = mul i4 %x, %x %tmp2 = mul i4 %tmp1, %x %tmp3 = mul i4 %tmp2, %x @@ -123,11 +164,14 @@ define i4 @foo4x9(i4 %x) { define i4 @foo4x10(i4 %x) { ; Can be done with three multiplies. -; CHECK-LABEL: @foo4x10( -; CHECK-NEXT: mul -; CHECK-NEXT: mul -; CHECK-NEXT: mul -; CHECK-NEXT: ret +; CHECK-LABEL: define i4 @foo4x10( +; CHECK-SAME: i4 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = mul i4 [[X]], [[X]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i4 [[TMP1]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i4 [[TMP4]], [[X]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i4 [[TMP2]], [[TMP2]] +; CHECK-NEXT: ret i4 [[TMP3]] +; %tmp1 = mul i4 %x, %x %tmp2 = mul i4 %tmp1, %x %tmp3 = mul i4 %tmp2, %x @@ -142,12 +186,15 @@ define i4 @foo4x10(i4 %x) { define i4 @foo4x11(i4 %x) { ; Can be done with four multiplies. -; CHECK-LABEL: @foo4x11( -; CHECK-NEXT: mul -; CHECK-NEXT: mul -; CHECK-NEXT: mul -; CHECK-NEXT: mul -; CHECK-NEXT: ret +; CHECK-LABEL: define i4 @foo4x11( +; CHECK-SAME: i4 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = mul i4 [[X]], [[X]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i4 [[TMP1]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i4 [[TMP4]], [[X]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i4 [[TMP2]], [[X]] +; CHECK-NEXT: [[TMP10:%.*]] = mul i4 [[TMP3]], [[TMP2]] +; CHECK-NEXT: ret i4 [[TMP10]] +; %tmp1 = mul i4 %x, %x %tmp2 = mul i4 %tmp1, %x %tmp3 = mul i4 %tmp2, %x @@ -163,10 +210,14 @@ define i4 @foo4x11(i4 %x) { define i4 @foo4x12(i4 %x) { ; Can be done with two multiplies. -; CHECK-LABEL: @foo4x12( -; CHECK-NEXT: mul -; CHECK-NEXT: mul -; CHECK-NEXT: ret +; CHECK-LABEL: define i4 @foo4x12( +; CHECK-SAME: i4 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = mul i4 [[X]], [[X]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i4 [[TMP1]], [[X]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i4 [[TMP4]], [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i4 [[TMP3]], [[TMP3]] +; CHECK-NEXT: ret i4 [[TMP2]] +; %tmp1 = mul i4 %x, %x %tmp2 = mul i4 %tmp1, %x %tmp3 = mul i4 %tmp2, %x @@ -183,11 +234,15 @@ define i4 @foo4x12(i4 %x) { define i4 @foo4x13(i4 %x) { ; Can be done with three multiplies. -; CHECK-LABEL: @foo4x13( -; CHECK-NEXT: mul -; CHECK-NEXT: mul -; CHECK-NEXT: mul -; CHECK-NEXT: ret +; CHECK-LABEL: define i4 @foo4x13( +; CHECK-SAME: i4 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = mul i4 [[X]], [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i4 [[TMP1]], [[X]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i4 [[TMP2]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i4 [[TMP3]], [[X]] +; CHECK-NEXT: [[TMP12:%.*]] = mul i4 [[TMP4]], [[TMP3]] +; CHECK-NEXT: ret i4 [[TMP12]] +; %tmp1 = mul i4 %x, %x %tmp2 = mul i4 %tmp1, %x %tmp3 = mul i4 %tmp2, %x @@ -205,11 +260,15 @@ define i4 @foo4x13(i4 %x) { define i4 @foo4x14(i4 %x) { ; Can be done with three multiplies. -; CHECK-LABEL: @foo4x14( -; CHECK-NEXT: mul -; CHECK-NEXT: mul -; CHECK-NEXT: mul -; CHECK-NEXT: ret +; CHECK-LABEL: define i4 @foo4x14( +; CHECK-SAME: i4 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = mul i4 [[X]], [[X]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i4 [[TMP1]], [[X]] +; CHECK-NEXT: [[TMP5:%.*]] = mul i4 [[TMP4]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = mul i4 [[TMP5]], [[X]] +; CHECK-NEXT: [[TMP7:%.*]] = mul i4 [[TMP6]], [[TMP6]] +; CHECK-NEXT: ret i4 [[TMP7]] +; %tmp1 = mul i4 %x, %x %tmp2 = mul i4 %tmp1, %x %tmp3 = mul i4 %tmp2, %x @@ -228,12 +287,16 @@ define i4 @foo4x14(i4 %x) { define i4 @foo4x15(i4 %x) { ; Can be done with four multiplies. -; CHECK-LABEL: @foo4x15( -; CHECK-NEXT: mul -; CHECK-NEXT: mul -; CHECK-NEXT: mul -; CHECK-NEXT: mul -; CHECK-NEXT: ret +; CHECK-LABEL: define i4 @foo4x15( +; CHECK-SAME: i4 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = mul i4 [[X]], [[X]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i4 [[TMP1]], [[X]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i4 [[TMP4]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = mul i4 [[TMP3]], [[X]] +; CHECK-NEXT: [[TMP5:%.*]] = mul i4 [[TMP6]], [[X]] +; CHECK-NEXT: [[TMP14:%.*]] = mul i4 [[TMP5]], [[TMP6]] +; CHECK-NEXT: ret i4 [[TMP14]] +; %tmp1 = mul i4 %x, %x %tmp2 = mul i4 %tmp1, %x %tmp3 = mul i4 %tmp2, %x diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-hot-callsite-threshold.prof b/llvm/test/Transforms/SampleProfile/Inputs/inline-hot-callsite-threshold.prof new file mode 100644 index 0000000000000..d1c0408210f49 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/inline-hot-callsite-threshold.prof @@ -0,0 +1,3 @@ +foo:100:100 + 1: bar:100 + 1:100 diff --git a/llvm/test/Transforms/SampleProfile/inline-hot-callsite-threshold.ll b/llvm/test/Transforms/SampleProfile/inline-hot-callsite-threshold.ll new file mode 100644 index 0000000000000..914ab4f1e3da5 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/inline-hot-callsite-threshold.ll @@ -0,0 +1,61 @@ +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-hot-callsite-threshold.prof -S -pass-remarks=sample-profile -sample-profile-hot-inline-threshold=100 2>&1 | FileCheck %s + +; CHECK: remark: a.cc:6:12: 'bar' inlined into 'foo' to match profiling context with (cost={{.*}}, threshold=100) +; CHECK: define dso_local noundef i32 @foo(i32 noundef %0) +; CHECK-NOT: %2 = tail call noundef i32 @bar(i32 noundef %0) +; CHECK-NEXT: %2 = icmp sgt i32 %0, 1 +; CHECK-NEXT: br i1 %2, label %3, label %bar.exit + +; Manually lower cost threshold for hot function inlining, so that the function +; is not inlined even profile indicates it as hot. +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-hot-callsite-threshold.prof -S -pass-remarks=sample-profile -sample-profile-hot-inline-threshold=1 2>&1 | FileCheck %s --check-prefix=COST + +; COST-NOT: remark +; COST: define dso_local noundef i32 @foo(i32 noundef %0) +; COST-NEXT: %2 = tail call noundef i32 @bar(i32 noundef %0) + +define dso_local noundef i32 @bar(i32 noundef %0) #0 !dbg !10 { + %2 = icmp sgt i32 %0, 1 + br i1 %2, label %3, label %15 +3: ; preds = %1 + %4 = add nsw i32 %0, -2 + %5 = mul i32 %4, %4 + %6 = add i32 %5, %0 + %7 = zext nneg i32 %4 to i33 + %8 = add nsw i32 %0, -3 + %9 = zext i32 %8 to i33 + %10 = mul i33 %7, %9 + %11 = lshr i33 %10, 1 + %12 = trunc nuw i33 %11 to i32 + %13 = xor i32 %12, -1 + %14 = add i32 %6, %13 + br label %15 +15: ; preds = %3, %1 + %16 = phi i32 [ 0, %1 ], [ %14, %3 ] + ret i32 %16 +} + +define dso_local noundef i32 @foo(i32 noundef %0) #1 !dbg !20 { + %2 = tail call noundef i32 @bar(i32 noundef %0), !dbg !24 + ret i32 %2 +} + +attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "use-sample-profile" } +attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "use-sample-profile" } +attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug) +!1 = !DIFile(filename: "a.cc", directory: ".") +!2 = !{i32 2, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!10 = distinct !DISubprogram(name: "bar", linkageName: "bar", scope: !1, file: !1, line: 1, type: !12, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0) +!11 = !DIFile(filename: "a.cc", directory: ".") +!12 = !DISubroutineType(types: !13) +!13 = !{!14, !14} +!14 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!20 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: !11, file: !11, line: 5, type: !12, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0) +!23 = !DILocation(line: 0, scope: !20) +!24 = !DILocation(line: 6, column: 12, scope: !20) diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll index 18cbd857d97bb..2cd9abf0e11e9 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll @@ -98,7 +98,7 @@ if.end: ;YAML-NEXT: - String: '(cost=' ;YAML-NEXT: - Cost: '15' ;YAML-NEXT: - String: ', threshold=' -;YAML-NEXT: - Threshold: '2147483647' +;YAML-NEXT: - Threshold: '3000' ;YAML-NEXT: - String: ')' ;YAML-NEXT: - String: ' at callsite ' ;YAML-NEXT: - String: foo diff --git a/llvm/test/Transforms/SampleProfile/remarks.ll b/llvm/test/Transforms/SampleProfile/remarks.ll index 997e02bb5b544..9c0143ae65ca7 100644 --- a/llvm/test/Transforms/SampleProfile/remarks.ll +++ b/llvm/test/Transforms/SampleProfile/remarks.ll @@ -22,7 +22,7 @@ ; We are expecting foo() to be inlined in main() (almost all the cycles are ; spent inside foo). -; CHECK: remark: remarks.cc:13:21: '_Z3foov' inlined into 'main' to match profiling context with (cost=130, threshold=2147483647) at callsite main:0:21; +; CHECK: remark: remarks.cc:13:21: '_Z3foov' inlined into 'main' to match profiling context with (cost=130, threshold=3000) at callsite main:0:21; ; CHECK: remark: remarks.cc:9:19: 'rand' inlined into 'main' to match profiling context with (cost=always): always inline attribute at callsite _Z3foov:6:19 @ main:0:21; ; The back edge for the loop is the hottest edge in the loop subgraph. @@ -51,7 +51,7 @@ ;YAML-NEXT: - String: '(cost=' ;YAML-NEXT: - Cost: '130' ;YAML-NEXT: - String: ', threshold=' -;YAML-NEXT: - Threshold: '2147483647' +;YAML-NEXT: - Threshold: '3000' ;YAML-NEXT: - String: ')' ;YAML-NEXT: - String: ' at callsite ' ;YAML-NEXT: - String: main diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll index 5cbda8a1e112e..c2e9be5688967 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll @@ -15,9 +15,7 @@ define <8 x i8> @trivial(<8 x i8> %a) { define <4 x i32> @add_same_operands(<4 x i32> %x) { ; CHECK-LABEL: @add_same_operands( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[SHUF]], [[SHUF]] -; CHECK-NEXT: [[REVSHUF:%.*]] = shufflevector <4 x i32> [[ADD]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[REVSHUF:%.*]] = add <4 x i32> [[X:%.*]], [[X]] ; CHECK-NEXT: ret <4 x i32> [[REVSHUF]] ; %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> @@ -364,8 +362,7 @@ define <8 x i8> @inner_shuffle(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { define <4 x i32> @extrause_add_same_operands(<4 x i32> %x) { ; CHECK-LABEL: @extrause_add_same_operands( ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[SHUF]], [[SHUF]] -; CHECK-NEXT: [[REVSHUF:%.*]] = shufflevector <4 x i32> [[ADD]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[REVSHUF:%.*]] = add <4 x i32> [[X]], [[X]] ; CHECK-NEXT: [[ADD2:%.*]] = add <4 x i32> [[SHUF]], [[REVSHUF]] ; CHECK-NEXT: ret <4 x i32> [[ADD2]] ; @@ -513,9 +510,7 @@ define <8 x half> @fma(<8 x half> %a, <8 x half> %b, <8 x half> %c) { define <4 x i64> @single_zext(<4 x i32> %x) { ; CHECK-LABEL: @single_zext( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[ZEXT:%.*]] = zext <4 x i32> [[SHUF]] to <4 x i64> -; CHECK-NEXT: [[REVSHUF:%.*]] = shufflevector <4 x i64> [[ZEXT]], <4 x i64> poison, <4 x i32> +; CHECK-NEXT: [[REVSHUF:%.*]] = zext <4 x i32> [[X:%.*]] to <4 x i64> ; CHECK-NEXT: ret <4 x i64> [[REVSHUF]] ; %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> @@ -570,19 +565,10 @@ define <8 x i16> @not_bitcast2(<4 x i32> %x, <8 x i16> %y) { define void @exttrunc(<8 x i32> %a, <8 x i32> %b, ptr %p) { ; CHECK-LABEL: @exttrunc( -; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[AB1:%.*]] = zext <4 x i32> [[AB]] to <4 x i64> -; CHECK-NEXT: [[AT1:%.*]] = zext <4 x i32> [[AT]] to <4 x i64> -; CHECK-NEXT: [[BB1:%.*]] = sext <4 x i32> [[BB]] to <4 x i64> -; CHECK-NEXT: [[BT1:%.*]] = sext <4 x i32> [[BT]] to <4 x i64> -; CHECK-NEXT: [[ABB:%.*]] = add <4 x i64> [[AB1]], [[BB1]] -; CHECK-NEXT: [[ABT:%.*]] = add <4 x i64> [[AT1]], [[BT1]] -; CHECK-NEXT: [[ABB1:%.*]] = trunc <4 x i64> [[ABB]] to <4 x i32> -; CHECK-NEXT: [[ABT1:%.*]] = trunc <4 x i64> [[ABT]] to <4 x i32> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[ABB1]], <4 x i32> [[ABT1]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i32> [[B:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[R:%.*]] = trunc <8 x i64> [[TMP3]] to <8 x i32> ; CHECK-NEXT: store <8 x i32> [[R]], ptr [[P:%.*]], align 32 ; CHECK-NEXT: ret void ; @@ -605,17 +591,9 @@ define void @exttrunc(<8 x i32> %a, <8 x i32> %b, ptr %p) { define void @zext(<8 x i16> %a, <8 x i16> %b, ptr %p) { ; CHECK-LABEL: @zext( -; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[AB1:%.*]] = zext <4 x i16> [[AB]] to <4 x i32> -; CHECK-NEXT: [[AT1:%.*]] = zext <4 x i16> [[AT]] to <4 x i32> -; CHECK-NEXT: [[BB1:%.*]] = zext <4 x i16> [[BB]] to <4 x i32> -; CHECK-NEXT: [[BT1:%.*]] = zext <4 x i16> [[BT]] to <4 x i32> -; CHECK-NEXT: [[ABB:%.*]] = add <4 x i32> [[AB1]], [[BB1]] -; CHECK-NEXT: [[ABT:%.*]] = add <4 x i32> [[AT1]], [[BT1]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[ABB]], <4 x i32> [[ABT]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i16> [[A:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[B:%.*]] to <8 x i32> +; CHECK-NEXT: [[R:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]] ; CHECK-NEXT: store <8 x i32> [[R]], ptr [[P:%.*]], align 32 ; CHECK-NEXT: ret void ; @@ -636,17 +614,9 @@ define void @zext(<8 x i16> %a, <8 x i16> %b, ptr %p) { define void @sext(<8 x i16> %a, <8 x i16> %b, ptr %p) { ; CHECK-LABEL: @sext( -; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[AB1:%.*]] = sext <4 x i16> [[AB]] to <4 x i32> -; CHECK-NEXT: [[AT1:%.*]] = sext <4 x i16> [[AT]] to <4 x i32> -; CHECK-NEXT: [[BB1:%.*]] = sext <4 x i16> [[BB]] to <4 x i32> -; CHECK-NEXT: [[BT1:%.*]] = sext <4 x i16> [[BT]] to <4 x i32> -; CHECK-NEXT: [[ABB:%.*]] = add <4 x i32> [[AB1]], [[BB1]] -; CHECK-NEXT: [[ABT:%.*]] = add <4 x i32> [[AT1]], [[BT1]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[ABB]], <4 x i32> [[ABT]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i16> [[A:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i16> [[B:%.*]] to <8 x i32> +; CHECK-NEXT: [[R:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]] ; CHECK-NEXT: store <8 x i32> [[R]], ptr [[P:%.*]], align 32 ; CHECK-NEXT: ret void ; @@ -705,11 +675,7 @@ define void @zext_types(<8 x i16> %a, <8 x i32> %b, ptr %p) { define void @trunc(<8 x i64> %a, <8 x i64> %b, ptr %p) { ; CHECK-LABEL: @trunc( -; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i64> [[A:%.*]], <8 x i64> poison, <4 x i32> -; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i64> [[A]], <8 x i64> poison, <4 x i32> -; CHECK-NEXT: [[ABB1:%.*]] = trunc <4 x i64> [[AB]] to <4 x i32> -; CHECK-NEXT: [[ABT1:%.*]] = trunc <4 x i64> [[AT]] to <4 x i32> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[ABB1]], <4 x i32> [[ABT1]], <8 x i32> +; CHECK-NEXT: [[R:%.*]] = trunc <8 x i64> [[A:%.*]] to <8 x i32> ; CHECK-NEXT: store <8 x i32> [[R]], ptr [[P:%.*]], align 32 ; CHECK-NEXT: ret void ; @@ -724,10 +690,8 @@ define void @trunc(<8 x i64> %a, <8 x i64> %b, ptr %p) { define <4 x i64> @zext_chain(<4 x i16> %x) { ; CHECK-LABEL: @zext_chain( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i16> [[X:%.*]], <4 x i16> poison, <4 x i32> -; CHECK-NEXT: [[ZEXT:%.*]] = zext <4 x i16> [[SHUF]] to <4 x i32> -; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i32> [[ZEXT]] to <4 x i64> -; CHECK-NEXT: [[REVSHUF:%.*]] = shufflevector <4 x i64> [[SEXT]], <4 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i16> [[X:%.*]] to <4 x i32> +; CHECK-NEXT: [[REVSHUF:%.*]] = sext <4 x i32> [[TMP1]] to <4 x i64> ; CHECK-NEXT: ret <4 x i64> [[REVSHUF]] ; %shuf = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> @@ -928,13 +892,11 @@ entry: define <4 x i8> @singleop(<4 x i8> %a, <4 x i8> %b) { ; CHECK-LABEL: @singleop( -; CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i8> [[A:%.*]], <4 x i8> poison, <4 x i32> -; CHECK-NEXT: [[B1:%.*]] = shufflevector <4 x i8> [[B:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[A2:%.*]] = zext <4 x i8> [[A1]] to <4 x i16> -; CHECK-NEXT: [[B2:%.*]] = zext <4 x i8> [[B1]] to <4 x i16> -; CHECK-NEXT: [[AB:%.*]] = add <4 x i16> [[A2]], [[B2]] -; CHECK-NEXT: [[T:%.*]] = trunc <4 x i16> [[AB]] to <4 x i8> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[T]], <4 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[B:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[A:%.*]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[R:%.*]] = trunc <4 x i16> [[TMP4]] to <4 x i8> ; CHECK-NEXT: ret <4 x i8> [[R]] ; %a1 = shufflevector <4 x i8> %a, <4 x i8> poison, <4 x i32> diff --git a/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll b/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll index bb370a6d1dfeb..7f7790cecb0eb 100644 --- a/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll +++ b/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll @@ -670,7 +670,7 @@ declare void @llvm.amdgcn.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i1) define amdgpu_cs void @test_buffer_atomic_fadd(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %offset, i1 %slc) { ; CHECK: immarg operand has non-immediate parameter ; CHECK-NEXT: i1 %slc - ; CHECK-ENXT: call void @llvm.amdgcn.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %offset, i1 %slc) + ; CHECK-NEXT: call void @llvm.amdgcn.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %offset, i1 %slc) call void @llvm.amdgcn.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %offset, i1 %slc) ret void } diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index affd87b98c141..fe1262893212f 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -617,3 +617,6 @@ def have_ld64_plugin_support(): # "OBJECT_MODE" to 'any' by default on AIX OS. if "system-aix" in config.available_features: config.environment["OBJECT_MODE"] = "any" + +if config.has_logf128: + config.available_features.add("has_logf128") diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in index 60a68b0edaf93..0968f6214772d 100644 --- a/llvm/test/lit.site.cfg.py.in +++ b/llvm/test/lit.site.cfg.py.in @@ -63,6 +63,7 @@ config.have_llvm_driver = @LLVM_TOOL_LLVM_DRIVER_BUILD@ config.spirv_tools_tests = @LLVM_INCLUDE_SPIRV_TOOLS_TESTS@ config.have_vc_rev = @LLVM_APPEND_VC_REV@ config.force_vc_rev = "@LLVM_FORCE_VC_REVISION@" +config.has_logf128 = @LLVM_HAS_LOGF128@ import lit.llvm lit.llvm.initialize(lit_config, config) diff --git a/llvm/test/tools/llvm-exegesis/AArch64/latency-by-opcode-name.s b/llvm/test/tools/llvm-exegesis/AArch64/latency-by-opcode-name.s index 653f544e36ce2..1db28a84e2ff6 100644 --- a/llvm/test/tools/llvm-exegesis/AArch64/latency-by-opcode-name.s +++ b/llvm/test/tools/llvm-exegesis/AArch64/latency-by-opcode-name.s @@ -10,4 +10,4 @@ CHECK-NEXT: config: '' CHECK-NEXT: register_initial_values: CHECK-DAG: - '[[REG2]]=0x0' # We don't check REG3 because in the case that REG2=REG3 the check would fail -CHECK-LAST: ... +CHECK-DAG: ... diff --git a/llvm/test/tools/llvm-exegesis/Mips/latency-GPR64.s b/llvm/test/tools/llvm-exegesis/Mips/latency-GPR64.s index f9b4860c3f4a0..cc2cf20ce05f4 100644 --- a/llvm/test/tools/llvm-exegesis/Mips/latency-GPR64.s +++ b/llvm/test/tools/llvm-exegesis/Mips/latency-GPR64.s @@ -9,4 +9,4 @@ CHECK-NEXT: AND64 CHECK-NEXT: config: '' CHECK-NEXT: register_initial_values: CHECK-DAG: - '[[REG1:[A-Z0-9]+_64]]=0x0' -CHECK-LAST: ... +CHECK-DAG: ... diff --git a/llvm/test/tools/llvm-exegesis/Mips/latency-by-opcode-name.s b/llvm/test/tools/llvm-exegesis/Mips/latency-by-opcode-name.s index f3853eaa62ea7..dcbbd3cf7fc35 100644 --- a/llvm/test/tools/llvm-exegesis/Mips/latency-by-opcode-name.s +++ b/llvm/test/tools/llvm-exegesis/Mips/latency-by-opcode-name.s @@ -9,4 +9,4 @@ CHECK-NEXT: ADD CHECK-NEXT: config: '' CHECK-NEXT: register_initial_values: CHECK-DAG: - '[[REG1:[A-Z0-9]+]]=0x0' -CHECK-LAST: ... +CHECK-DAG: ... diff --git a/llvm/test/tools/llvm-exegesis/PowerPC/latency-by-opcode-name.s b/llvm/test/tools/llvm-exegesis/PowerPC/latency-by-opcode-name.s index 3d457aeb59276..c4d9fcf2e0613 100644 --- a/llvm/test/tools/llvm-exegesis/PowerPC/latency-by-opcode-name.s +++ b/llvm/test/tools/llvm-exegesis/PowerPC/latency-by-opcode-name.s @@ -8,4 +8,4 @@ CHECK-NEXT: ADD8 CHECK-NEXT: config: '' CHECK-NEXT: register_initial_values: CHECK-DAG: - '[[REG1:[A-Z0-9]+]]=0x0' -CHECK-LAST: ... +CHECK-DAG: ... diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/latency-CMOV32rr.s b/llvm/test/tools/llvm-exegesis/X86/latency/latency-CMOV32rr.s index 9cdd9bf029d02..384f9f1d8cf9e 100644 --- a/llvm/test/tools/llvm-exegesis/X86/latency/latency-CMOV32rr.s +++ b/llvm/test/tools/llvm-exegesis/X86/latency/latency-CMOV32rr.s @@ -8,4 +8,4 @@ CHECK-NEXT: key: CHECK-NEXT: instructions: CHECK-NEXT: 'CMOV32rr {{.*}} i_0x{{[0-9a-f]}}' CHECK-NEXT: config: '' -CHECK-LAST: ... +CHECK-DAG: ... diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/latency-IN16rr.s b/llvm/test/tools/llvm-exegesis/X86/latency/latency-IN16rr.s index 8b4f42dd32015..c82f5c884b992 100644 --- a/llvm/test/tools/llvm-exegesis/X86/latency/latency-IN16rr.s +++ b/llvm/test/tools/llvm-exegesis/X86/latency/latency-IN16rr.s @@ -12,4 +12,4 @@ CHECK-NEXT: - {{.*}} CHECK-NEXT: config: '' CHECK-NEXT: register_initial_values: CHECK-DAG: - '[[REG1:[A-Z0-9]+]]=0x0' -CHECK-LAST: ... +CHECK-DAG: ... diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/latency-SBB8rr.s b/llvm/test/tools/llvm-exegesis/X86/latency/latency-SBB8rr.s index c20e687cf20d2..26c4391bc99d6 100644 --- a/llvm/test/tools/llvm-exegesis/X86/latency/latency-SBB8rr.s +++ b/llvm/test/tools/llvm-exegesis/X86/latency/latency-SBB8rr.s @@ -9,4 +9,4 @@ CHECK-NEXT: SBB8rr CHECK-NEXT: config: '' CHECK-NEXT: register_initial_values: CHECK-DAG: - '[[REG1:[A-Z0-9]+]]=0x0' -CHECK-LAST: ... +CHECK-DAG: ... diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/latency-SQRTSSr.s b/llvm/test/tools/llvm-exegesis/X86/latency/latency-SQRTSSr.s index 7e67a4343f4e6..bf97a40c4bf0d 100644 --- a/llvm/test/tools/llvm-exegesis/X86/latency/latency-SQRTSSr.s +++ b/llvm/test/tools/llvm-exegesis/X86/latency/latency-SQRTSSr.s @@ -10,4 +10,4 @@ CHECK-NEXT: SQRTSSr CHECK-NEXT: config: '' CHECK-NEXT: register_initial_values: CHECK-NOT: crashed -CHECK-LAST: ... +CHECK-DAG: ... diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/latency-by-opcode-name.s b/llvm/test/tools/llvm-exegesis/X86/latency/latency-by-opcode-name.s index 4fee6fe927097..08beccfe7704f 100644 --- a/llvm/test/tools/llvm-exegesis/X86/latency/latency-by-opcode-name.s +++ b/llvm/test/tools/llvm-exegesis/X86/latency/latency-by-opcode-name.s @@ -9,4 +9,4 @@ CHECK-NEXT: ADD32rr CHECK-NEXT: config: '' CHECK-NEXT: register_initial_values: CHECK-DAG: - '[[REG1:[A-Z0-9]+]]=0x0' -CHECK-LAST: ... +CHECK-DAG: ... diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/max-configs.test b/llvm/test/tools/llvm-exegesis/X86/latency/max-configs.test index 382e742144ac4..f27101d896608 100644 --- a/llvm/test/tools/llvm-exegesis/X86/latency/max-configs.test +++ b/llvm/test/tools/llvm-exegesis/X86/latency/max-configs.test @@ -9,7 +9,7 @@ CHECK-NEXT: SBB8rr CHECK-NEXT: config: '' CHECK-NEXT: register_initial_values: CHECK-DAG: - '[[REG1:[A-Z0-9]+]]=0x0' -CHECK-LAST: ... +CHECK-DAG: ... CHECK1-NOT: SBB8rr @@ -21,4 +21,4 @@ CHECK2-NEXT: SBB8rr CHECK2-NEXT: config: '' CHECK2-NEXT: register_initial_values: CHECK2-DAG: - '[[REG1:[A-Z0-9]+]]=0x0' -CHECK2-LAST: ... +CHECK2-DAG: ... diff --git a/llvm/test/tools/llvm-exegesis/X86/lbr/mov-add.s b/llvm/test/tools/llvm-exegesis/X86/lbr/mov-add.s index af1662d93a744..2a8cc8e34450a 100644 --- a/llvm/test/tools/llvm-exegesis/X86/lbr/mov-add.s +++ b/llvm/test/tools/llvm-exegesis/X86/lbr/mov-add.s @@ -16,4 +16,4 @@ CHECK-NEXT: {{.*}} CHECK-NEXT: num_repetitions: 10000 CHECK-NEXT: measurements: CHECK-NEXT: {{.*}} value: 0.0001, per_snippet_value: 0.0002 {{.*}} -CHECK-LAST: ... +CHECK-DAG: ... diff --git a/llvm/test/tools/llvm-exegesis/X86/uops/uops-CMOV16rm-noreg-serialization.s b/llvm/test/tools/llvm-exegesis/X86/uops/uops-CMOV16rm-noreg-serialization.s index 302c2b0ee722b..1e673e806da21 100644 --- a/llvm/test/tools/llvm-exegesis/X86/uops/uops-CMOV16rm-noreg-serialization.s +++ b/llvm/test/tools/llvm-exegesis/X86/uops/uops-CMOV16rm-noreg-serialization.s @@ -8,4 +8,4 @@ CHECK-YAML-NEXT: mode: uops CHECK-YAML-NEXT: key: CHECK-YAML-NEXT: instructions: CHECK-YAML-NEXT: - 'CMOV16rm {{[A-Z0-9]+}} {{[A-Z0-9]+}} {{[A-Z0-9]+}} i_0x1 %noreg i_0x0 %noreg i_0x{{[0-9a-f]}}' -CHECK-YAML-LAST: ... +CHECK-YAML-DAG: ... diff --git a/llvm/test/tools/llvm-profdata/memprof-merge-v0.test b/llvm/test/tools/llvm-profdata/memprof-merge-versions.test similarity index 77% rename from llvm/test/tools/llvm-profdata/memprof-merge-v0.test rename to llvm/test/tools/llvm-profdata/memprof-merge-versions.test index 28f65e0781bc6..aa7d0329425dc 100644 --- a/llvm/test/tools/llvm-profdata/memprof-merge-v0.test +++ b/llvm/test/tools/llvm-profdata/memprof-merge-versions.test @@ -19,6 +19,12 @@ RUN: llvm-profdata show %t.prof.v2 | FileCheck %s RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=2 --memprof-full-schema --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v2 RUN: llvm-profdata show %t.prof.v2 | FileCheck %s +RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=3 --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v3 +RUN: llvm-profdata show %t.prof.v3 | FileCheck %s + +RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=3 --memprof-full-schema --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v3 +RUN: llvm-profdata show %t.prof.v3 | FileCheck %s + For now we only check the validity of the instrumented profile since we don't have a way to display the contents of the memprof indexed format yet. diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 28c3afa101647..fae6d1e989ab5 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -306,7 +306,8 @@ cl::opt MemProfVersionRequested( cl::init(memprof::Version0), cl::values(clEnumValN(memprof::Version0, "0", "version 0"), clEnumValN(memprof::Version1, "1", "version 1"), - clEnumValN(memprof::Version2, "2", "version 2"))); + clEnumValN(memprof::Version2, "2", "version 2"), + clEnumValN(memprof::Version3, "3", "version 3"))); cl::opt MemProfFullSchema( "memprof-full-schema", cl::Hidden, cl::sub(MergeSubcommand), diff --git a/llvm/utils/TableGen/ARMTargetDefEmitter.cpp b/llvm/utils/TableGen/ARMTargetDefEmitter.cpp index b79458529623f..5efa7d2722d3f 100644 --- a/llvm/utils/TableGen/ARMTargetDefEmitter.cpp +++ b/llvm/utils/TableGen/ARMTargetDefEmitter.cpp @@ -116,6 +116,24 @@ static void EmitARMTargetDef(RecordKeeper &RK, raw_ostream &OS) { << "#endif // EMIT_EXTENSIONS\n" << "\n"; + // Emit extension dependencies + OS << "#ifdef EMIT_EXTENSION_DEPENDENCIES\n" + << "inline constexpr ExtensionDependency ExtensionDependencies[] = {\n"; + for (const Record *Rec : SortedExtensions) { + auto LaterAEK = Rec->getValueAsString("ArchExtKindSpelling").upper(); + for (const Record *I : Rec->getValueAsListOfDefs("Implies")) + if (auto EarlierAEK = I->getValueAsOptionalString("ArchExtKindSpelling")) + OS << " {" << EarlierAEK->upper() << ", " << LaterAEK << "},\n"; + } + // FIXME: Tablegen has the Subtarget Feature FeatureRCPC_IMMO which is implied + // by FeatureRCPC3 and in turn implies FeatureRCPC. The proper fix is to make + // FeatureRCPC_IMMO an Extension but that will expose it to the command line. + OS << " {AEK_RCPC, AEK_RCPC3},\n"; + OS << "};\n" + << "#undef EMIT_EXTENSION_DEPENDENCIES\n" + << "#endif // EMIT_EXTENSION_DEPENDENCIES\n" + << "\n"; + // Emit architecture information OS << "#ifdef EMIT_ARCHITECTURES\n"; diff --git a/llvm/utils/TableGen/X86InstrMappingEmitter.cpp b/llvm/utils/TableGen/X86InstrMappingEmitter.cpp index a8970d8bcbacd..d89a1f078328b 100644 --- a/llvm/utils/TableGen/X86InstrMappingEmitter.cpp +++ b/llvm/utils/TableGen/X86InstrMappingEmitter.cpp @@ -277,8 +277,22 @@ void X86InstrMappingEmitter::emitNFTransformTable( if (Pos == std::string::npos) continue; - if (auto *NewRec = Records.getDef(Name.erase(Pos, 3))) + if (auto *NewRec = Records.getDef(Name.erase(Pos, 3))) { +#ifndef NDEBUG + auto ClobberEFLAGS = [](const Record *R) { + return llvm::any_of( + R->getValueAsListOfDefs("Defs"), + [](const Record *Def) { return Def->getName() == "EFLAGS"; }); + }; + if (ClobberEFLAGS(Rec)) + report_fatal_error("EFLAGS should not be clobbered by " + + Rec->getName()); + if (!ClobberEFLAGS(NewRec)) + report_fatal_error("EFLAGS should be clobbered by " + + NewRec->getName()); +#endif Table.push_back(std::pair(&Target.getInstruction(NewRec), Inst)); + } } printTable(Table, "X86NFTransformTable", "GET_X86_NF_TRANSFORM_TABLE", OS); } diff --git a/llvm/utils/TableGen/X86ManualCompressEVEXTables.def b/llvm/utils/TableGen/X86ManualCompressEVEXTables.def index 665a394f57a6a..cab601bf8131f 100644 --- a/llvm/utils/TableGen/X86ManualCompressEVEXTables.def +++ b/llvm/utils/TableGen/X86ManualCompressEVEXTables.def @@ -48,6 +48,14 @@ NOCOMP(VPSRAQZ256ri) NOCOMP(VPSRAQZ256rm) NOCOMP(VPSRAQZ256rr) NOCOMP(VSCALEFPSZ256rm) +// When condition evaluates to false, the destination register is zeroed for +// nonNDD CFCMOV but not for NDD CFCMOV. +NOCOMP(CFCMOV16rm_ND) +NOCOMP(CFCMOV16rr_ND) +NOCOMP(CFCMOV32rm_ND) +NOCOMP(CFCMOV32rr_ND) +NOCOMP(CFCMOV64rm_ND) +NOCOMP(CFCMOV64rr_ND) #undef NOCOMP #ifndef ENTRY diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index b642b2c82e6d8..8a5f6d1908784 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -289,7 +289,6 @@ if (current_toolchain == default_toolchain) { "__atomic/kill_dependency.h", "__atomic/memory_order.h", "__atomic/to_gcc_order.h", - "__availability", "__bit/bit_cast.h", "__bit/bit_ceil.h", "__bit/bit_floor.h", @@ -384,6 +383,11 @@ if (current_toolchain == default_toolchain) { "__concepts/totally_ordered.h", "__condition_variable/condition_variable.h", "__config", + "__configuration/abi.h", + "__configuration/availability.h", + "__configuration/compiler.h", + "__configuration/language.h", + "__configuration/platform.h", "__coroutine/coroutine_handle.h", "__coroutine/coroutine_traits.h", "__coroutine/noop_coroutine_handle.h", diff --git a/llvm/utils/gn/secondary/lldb/source/API/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/API/BUILD.gn index c99c1b5483355..f0bf6a8f3dbaf 100644 --- a/llvm/utils/gn/secondary/lldb/source/API/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/API/BUILD.gn @@ -40,6 +40,8 @@ target(liblldb_type, "liblldb") { include_dirs = [ ".." ] sources = [ "SBAddress.cpp", + "SBAddressRange.cpp", + "SBAddressRangeList.cpp", "SBAttachInfo.cpp", "SBBlock.cpp", "SBBreakpoint.cpp", diff --git a/llvm/utils/gn/secondary/lldb/source/Core/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Core/BUILD.gn index 30a9fb3ecceaa..0c9632a0a1915 100644 --- a/llvm/utils/gn/secondary/lldb/source/Core/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Core/BUILD.gn @@ -45,6 +45,7 @@ static_library("Core") { sources = [ "Address.cpp", "AddressRange.cpp", + "AddressRangeListImpl.cpp", "AddressResolver.cpp", "AddressResolverFileLine.cpp", "Communication.cpp", diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn index e93130eacdc74..d8266fee05014 100644 --- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn @@ -318,6 +318,7 @@ write_cmake_config("llvm-config") { "LLVM_ENABLE_ZSTD=", "LLVM_FORCE_USE_OLD_TOOLCHAIN=", "LLVM_HAS_ATOMICS=1", + "LLVM_HAS_LOGF128=", "LLVM_HAVE_TFLITE=", "LLVM_HOST_TRIPLE=$llvm_current_triple", "LLVM_NATIVE_ARCH=$native_target", diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn index 78a9d20812ef9..8264f6d73e791 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn @@ -12,9 +12,9 @@ tablegen("X86GenDAGISel") { td_file = "X86.td" } -tablegen("X86GenCompressEVEXTables") { +tablegen("X86GenInstrMapping") { visibility = [ ":LLVMX86CodeGen" ] - args = [ "-gen-x86-compress-evex-tables" ] + args = [ "-gen-x86-instr-mapping" ] td_file = "X86.td" } @@ -48,11 +48,11 @@ tablegen("X86GenRegisterBank") { static_library("LLVMX86CodeGen") { deps = [ ":X86GenCallingConv", - ":X86GenCompressEVEXTables", ":X86GenDAGISel", ":X86GenFastISel", ":X86GenFoldTables", ":X86GenGlobalISel", + ":X86GenInstrMapping", ":X86GenRegisterBank", "MCTargetDesc", "TargetInfo", diff --git a/llvm/utils/gn/secondary/llvm/test/BUILD.gn b/llvm/utils/gn/secondary/llvm/test/BUILD.gn index 826dcf4e6ee9b..60d6d7b8c3ce7 100644 --- a/llvm/utils/gn/secondary/llvm/test/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/test/BUILD.gn @@ -64,6 +64,7 @@ write_lit_config("lit_site_cfg") { "LLVM_ENABLE_HTTPLIB=0", "LLVM_ENABLE_ZSTD=0", "LLVM_FORCE_VC_REVISION=", + "LLVM_HAS_LOGF128=0", "LLVM_HAVE_OPT_VIEWER_MODULES=0", "LLVM_HOST_TRIPLE=$llvm_current_triple", "LLVM_INCLUDE_DXIL_TESTS=0", diff --git a/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn index f3ae5b5899ac6..2e11d25767cd0 100644 --- a/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn @@ -64,7 +64,7 @@ executable("llvm-tblgen") { "SearchableTableEmitter.cpp", "SubtargetEmitter.cpp", "WebAssemblyDisassemblerEmitter.cpp", - "X86CompressEVEXTablesEmitter.cpp", + "X86InstrMappingEmitter.cpp", "X86DisassemblerTables.cpp", "X86FoldTablesEmitter.cpp", "X86MnemonicTables.cpp", diff --git a/llvm/utils/vim/syntax/llvm.vim b/llvm/utils/vim/syntax/llvm.vim index d86e3d1ddbc27..905d696400ca3 100644 --- a/llvm/utils/vim/syntax/llvm.vim +++ b/llvm/utils/vim/syntax/llvm.vim @@ -150,6 +150,7 @@ syn keyword llvmKeyword \ preallocated \ private \ protected + \ ptrauth \ ptx_device \ ptx_kernel \ readnone diff --git a/mlir/include/mlir/Analysis/DataFlow/IntegerRangeAnalysis.h b/mlir/include/mlir/Analysis/DataFlow/IntegerRangeAnalysis.h index 8bd7cf880c6af..191c023fb642c 100644 --- a/mlir/include/mlir/Analysis/DataFlow/IntegerRangeAnalysis.h +++ b/mlir/include/mlir/Analysis/DataFlow/IntegerRangeAnalysis.h @@ -24,51 +24,6 @@ namespace mlir { namespace dataflow { -/// This lattice value represents the integer range of an SSA value. -class IntegerValueRange { -public: - /// Create a maximal range ([0, uint_max(t)] / [int_min(t), int_max(t)]) - /// range that is used to mark the value as unable to be analyzed further, - /// where `t` is the type of `value`. - static IntegerValueRange getMaxRange(Value value); - - /// Create an integer value range lattice value. - IntegerValueRange(std::optional value = std::nullopt) - : value(std::move(value)) {} - - /// Whether the range is uninitialized. This happens when the state hasn't - /// been set during the analysis. - bool isUninitialized() const { return !value.has_value(); } - - /// Get the known integer value range. - const ConstantIntRanges &getValue() const { - assert(!isUninitialized()); - return *value; - } - - /// Compare two ranges. - bool operator==(const IntegerValueRange &rhs) const { - return value == rhs.value; - } - - /// Take the union of two ranges. - static IntegerValueRange join(const IntegerValueRange &lhs, - const IntegerValueRange &rhs) { - if (lhs.isUninitialized()) - return rhs; - if (rhs.isUninitialized()) - return lhs; - return IntegerValueRange{lhs.getValue().rangeUnion(rhs.getValue())}; - } - - /// Print the integer value range. - void print(raw_ostream &os) const { os << value; } - -private: - /// The known integer value range. - std::optional value; -}; - /// This lattice element represents the integer value range of an SSA value. /// When this lattice is updated, it automatically updates the constant value /// of the SSA value (if the range can be narrowed to one). diff --git a/mlir/include/mlir/Conversion/TosaToTensor/TosaToTensor.h b/mlir/include/mlir/Conversion/TosaToTensor/TosaToTensor.h index 3953c83f3aa10..76a4b1b156336 100644 --- a/mlir/include/mlir/Conversion/TosaToTensor/TosaToTensor.h +++ b/mlir/include/mlir/Conversion/TosaToTensor/TosaToTensor.h @@ -16,6 +16,7 @@ #include "mlir/Pass/Pass.h" namespace mlir { +class TypeConverter; #define GEN_PASS_DECL_TOSATOTENSOR #include "mlir/Conversion/Passes.h.inc" @@ -24,7 +25,8 @@ namespace tosa { std::unique_ptr createTosaToTensor(); -void populateTosaToTensorConversionPatterns(RewritePatternSet *patterns); +void populateTosaToTensorConversionPatterns(TypeConverter &converter, + RewritePatternSet *patterns); } // namespace tosa } // namespace mlir diff --git a/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td b/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td index ead52332e8eec..81ed0f924a2e2 100644 --- a/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td +++ b/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td @@ -49,7 +49,7 @@ class Arith_BinaryOp traits = []> : // Base class for integer binary operations. class Arith_IntBinaryOp traits = []> : Arith_BinaryOp]>, + [DeclareOpInterfaceMethods]>, Arguments<(ins SignlessIntegerLike:$lhs, SignlessIntegerLike:$rhs)>, Results<(outs SignlessIntegerLike:$result)>; @@ -83,12 +83,25 @@ class Arith_FloatBinaryOp traits = []> : attr-dict `:` type($result) }]; } +// Checks that tensor input and outputs have identical shapes. This is stricker +// than the verification done in `SameOperandsAndResultShape` that allows for +// tensor dimensions to be 'compatible' (e.g., dynamic dimensions being +// compatible with static ones). +def SameInputOutputTensorDims : PredOpTrait< + "input and output have the same tensor dimensions", + AllMatchSameOperatorPred<["in", "out"], + "(::llvm::isa<::mlir::TensorType>($_self.getType()) ?" + " ::llvm::cast<::mlir::TensorType>($_self.getType()).getShape() :" + " ::llvm::ArrayRef{})">>; + // Base class for arithmetic cast operations. Requires a single operand and -// result. If either is a shaped type, then the other must be of the same shape. +// result. If either is a shaped type, then the other must be of the same +// shape. In the case of tensor types, this also includes the corresponding +// operand/result dimensions being equal. class Arith_CastOp traits = []> : Arith_Op]>, + SameInputOutputTensorDims, DeclareOpInterfaceMethods]>, Arguments<(ins From:$in)>, Results<(outs To:$out)> { let assemblyFormat = "$in attr-dict `:` type($in) `to` type($out)"; @@ -107,7 +120,7 @@ class Arith_IToICastOp traits = []> : Arith_CastOp]>; + [DeclareOpInterfaceMethods]>; // Cast from an integer type to a floating point type. class Arith_IToFCastOp traits = []> : Arith_CastOp; @@ -139,7 +152,7 @@ class Arith_CompareOpOfAnyRank traits = []> : class Arith_IntBinaryOpWithOverflowFlags traits = []> : Arith_BinaryOp, + [Pure, DeclareOpInterfaceMethods, DeclareOpInterfaceMethods]>, Arguments<(ins SignlessIntegerLike:$lhs, SignlessIntegerLike:$rhs, DefaultValuedAttr< @@ -159,7 +172,7 @@ def Arith_ConstantOp : Op, AllTypesMatch<["value", "result"]>, - DeclareOpInterfaceMethods]> { + DeclareOpInterfaceMethods]> { let summary = "integer or floating point constant"; let description = [{ The `constant` operation produces an SSA value equal to some integer or @@ -1231,7 +1244,7 @@ def Arith_TruncIOp : Arith_IToICastOp<"trunci"> { def Arith_TruncFOp : Arith_Op<"truncf", - [Pure, SameOperandsAndResultShape, + [Pure, SameOperandsAndResultShape, SameInputOutputTensorDims, DeclareOpInterfaceMethods, DeclareOpInterfaceMethods]>, Arguments<(ins FloatLike:$in, @@ -1327,7 +1340,7 @@ def IndexCastTypeConstraint : TypeConstraint]> { + [DeclareOpInterfaceMethods]> { let summary = "cast between index and integer types"; let description = [{ Casts between scalar or vector integers and corresponding 'index' scalar or @@ -1346,7 +1359,7 @@ def Arith_IndexCastOp def Arith_IndexCastUIOp : Arith_CastOp<"index_castui", IndexCastTypeConstraint, IndexCastTypeConstraint, - [DeclareOpInterfaceMethods]> { + [DeclareOpInterfaceMethods]> { let summary = "unsigned cast between index and integer types"; let description = [{ Casts between scalar or vector integers and corresponding 'index' scalar or @@ -1400,7 +1413,7 @@ def Arith_BitcastOp : Arith_CastOp<"bitcast", BitcastTypeConstraint, def Arith_CmpIOp : Arith_CompareOpOfAnyRank<"cmpi", - [DeclareOpInterfaceMethods]> { + [DeclareOpInterfaceMethods]> { let summary = "integer comparison operation"; let description = [{ The `cmpi` operation is a generic comparison for integer-like types. Its two @@ -1555,7 +1568,7 @@ class ScalarConditionOrMatchingShape names> : def SelectOp : Arith_Op<"select", [Pure, AllTypesMatch<["true_value", "false_value", "result"]>, ScalarConditionOrMatchingShape<["condition", "result"]>, - DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods, ] # ElementwiseMappable.traits> { let summary = "select operation"; let description = [{ diff --git a/mlir/include/mlir/Dialect/Arith/Transforms/Passes.h b/mlir/include/mlir/Dialect/Arith/Transforms/Passes.h index cbc6147cb81e2..9dc262cc72ed0 100644 --- a/mlir/include/mlir/Dialect/Arith/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Arith/Transforms/Passes.h @@ -24,9 +24,6 @@ namespace arith { class WideIntEmulationConverter; class NarrowTypeEmulationConverter; -/// Create a pass to bufferize arith.constant ops. -std::unique_ptr createConstantBufferizePass(uint64_t alignment = 0); - /// Adds patterns to emulate wide Arith and Function ops over integer /// types into supported ones. This is done by splitting original power-of-two /// i2N integer types into two iN halves. diff --git a/mlir/include/mlir/Dialect/Arith/Transforms/Passes.td b/mlir/include/mlir/Dialect/Arith/Transforms/Passes.td index 4096e309199e9..550c5c0cf4f60 100644 --- a/mlir/include/mlir/Dialect/Arith/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Arith/Transforms/Passes.td @@ -11,22 +11,6 @@ include "mlir/Pass/PassBase.td" -def ArithBufferizePass : Pass<"arith-bufferize", "ModuleOp"> { - let summary = "Bufferize Arith dialect ops."; - let description = [{ - This pass bufferizes arith dialect ops. - - This pass needs to be a module pass because it inserts memref.global - ops into the module, which cannot be done safely from a function pass due to - multi-threading. Most other bufferization passes can run in parallel at - function granularity. - }]; - let options = [ - Option<"alignment", "alignment", "unsigned", /*default=*/"0", - "Create global memrefs with a specified alignment">, - ]; -} - def ArithExpandOpsPass : Pass<"arith-expand"> { let summary = "Legalize Arith ops to be convertible to LLVM."; let dependentDialects = ["vector::VectorDialect"]; diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h index 459c252b70712..e053e6c97e143 100644 --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h @@ -221,9 +221,6 @@ createPromoteBuffersToStackPass(std::function isSmallAlloc); /// insert_slice ops. std::unique_ptr createEmptyTensorEliminationPass(); -/// Create a pass that bufferizes ops from the bufferization dialect. -std::unique_ptr createBufferizationBufferizePass(); - //===----------------------------------------------------------------------===// // Registration //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td index 75ce85c9128c9..8f8826b9ad56b 100644 --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td @@ -350,11 +350,6 @@ def FinalizingBufferize : Pass<"finalizing-bufferize", "func::FuncOp"> { let constructor = "mlir::bufferization::createFinalizingBufferizePass()"; } -def BufferizationBufferize : Pass<"bufferization-bufferize", "func::FuncOp"> { - let summary = "Bufferize the `bufferization` dialect"; - let constructor = "mlir::bufferization::createBufferizationBufferizePass()"; -} - def DropEquivalentBufferResults : Pass<"drop-equivalent-buffer-results", "ModuleOp"> { let summary = "Remove MemRef return values that are equivalent to a bbArg"; let description = [{ diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td index 1da68ed2176d8..10719aae5c8b4 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td @@ -52,7 +52,7 @@ def GPU_DimensionAttr : EnumAttr; class GPU_IndexOp traits = []> : GPU_Op, + DeclareOpInterfaceMethods, DeclareOpInterfaceMethods])>, Arguments<(ins GPU_DimensionAttr:$dimension)>, Results<(outs Index)> { let assemblyFormat = "$dimension attr-dict"; @@ -144,7 +144,7 @@ def GPU_ThreadIdOp : GPU_IndexOp<"thread_id"> { } def GPU_LaneIdOp : GPU_Op<"lane_id", [ - Pure, DeclareOpInterfaceMethods]> { + Pure, DeclareOpInterfaceMethods]> { let description = [{ Returns the lane id within the subgroup (warp/wave). @@ -158,7 +158,7 @@ def GPU_LaneIdOp : GPU_Op<"lane_id", [ } def GPU_SubgroupIdOp : GPU_Op<"subgroup_id", [ - Pure, DeclareOpInterfaceMethods]>, + Pure, DeclareOpInterfaceMethods]>, Arguments<(ins)>, Results<(outs Index:$result)> { let description = [{ Returns the subgroup id, i.e., the index of the current subgroup within the @@ -190,7 +190,7 @@ def GPU_GlobalIdOp : GPU_IndexOp<"global_id"> { def GPU_NumSubgroupsOp : GPU_Op<"num_subgroups", [ - Pure, DeclareOpInterfaceMethods]>, + Pure, DeclareOpInterfaceMethods]>, Arguments<(ins)>, Results<(outs Index:$result)> { let description = [{ Returns the number of subgroups within a workgroup. @@ -206,7 +206,7 @@ def GPU_NumSubgroupsOp : GPU_Op<"num_subgroups", [ } def GPU_SubgroupSizeOp : GPU_Op<"subgroup_size", [ - Pure, DeclareOpInterfaceMethods]>, + Pure, DeclareOpInterfaceMethods]>, Arguments<(ins)>, Results<(outs Index:$result)> { let description = [{ Returns the number of threads within a subgroup. @@ -687,7 +687,7 @@ def GPU_LaunchFuncOp :GPU_Op<"launch_func", [ def GPU_LaunchOp : GPU_Op<"launch", [ AutomaticAllocationScope, AttrSizedOperandSegments, GPU_AsyncOpInterface, - DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods, RecursiveMemoryEffects]>, Arguments<(ins Variadic:$asyncDependencies, Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ, diff --git a/mlir/include/mlir/Dialect/Index/IR/IndexOps.td b/mlir/include/mlir/Dialect/Index/IR/IndexOps.td index c6079cb8a98c8..a30ae9f739cbc 100644 --- a/mlir/include/mlir/Dialect/Index/IR/IndexOps.td +++ b/mlir/include/mlir/Dialect/Index/IR/IndexOps.td @@ -25,7 +25,7 @@ include "mlir/IR/OpBase.td" /// Base class for Index dialect operations. class IndexOp traits = []> : Op] # traits>; + [DeclareOpInterfaceMethods] # traits>; //===----------------------------------------------------------------------===// // IndexBinaryOp diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td index 535cf8dfd2ced..bfcfbd64ae021 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td @@ -1037,4 +1037,25 @@ def LLVM_TargetFeaturesAttr : LLVM_Attr<"TargetFeatures", "target_features"> let genVerifyDecl = 1; } +//===----------------------------------------------------------------------===// +// UndefAttr +//===----------------------------------------------------------------------===// + +/// Folded into from LLVM::UndefOp. +def LLVM_UndefAttr : LLVM_Attr<"Undef", "undef">; + +//===----------------------------------------------------------------------===// +// PoisonAttr +//===----------------------------------------------------------------------===// + +/// Folded into from LLVM::PoisonOp. +def LLVM_PoisonAttr : LLVM_Attr<"Poison", "poison">; + +//===----------------------------------------------------------------------===// +// ZeroAttr +//===----------------------------------------------------------------------===// + +/// Folded into from LLVM::ZeroOp. +def LLVM_ZeroAttr : LLVM_Attr<"Zero", "zero">; + #endif // LLVMIR_ATTRDEFS diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index 84e67d2c11dbd..f6f907f39a4b4 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -1522,7 +1522,7 @@ def LLVM_NoneTokenOp let assemblyFormat = "attr-dict `:` type($res)"; } -def LLVM_UndefOp : LLVM_Op<"mlir.undef", [Pure]>, +def LLVM_UndefOp : LLVM_Op<"mlir.undef", [Pure, ConstantLike]>, LLVM_Builder<"$res = llvm::UndefValue::get($_resultType);"> { let summary = "Creates an undefined value of LLVM dialect type."; let description = [{ @@ -1541,9 +1541,10 @@ def LLVM_UndefOp : LLVM_Op<"mlir.undef", [Pure]>, let results = (outs LLVM_Type:$res); let builders = [LLVM_OneResultOpBuilder]; let assemblyFormat = "attr-dict `:` type($res)"; + let hasFolder = 1; } -def LLVM_PoisonOp : LLVM_Op<"mlir.poison", [Pure]>, +def LLVM_PoisonOp : LLVM_Op<"mlir.poison", [Pure, ConstantLike]>, LLVM_Builder<"$res = llvm::PoisonValue::get($_resultType);"> { let summary = "Creates a poison value of LLVM dialect type."; let description = [{ @@ -1563,10 +1564,11 @@ def LLVM_PoisonOp : LLVM_Op<"mlir.poison", [Pure]>, let results = (outs LLVM_Type:$res); let builders = [LLVM_OneResultOpBuilder]; let assemblyFormat = "attr-dict `:` type($res)"; + let hasFolder = 1; } def LLVM_ZeroOp - : LLVM_Op<"mlir.zero", [Pure]>, + : LLVM_Op<"mlir.zero", [Pure, ConstantLike]>, LLVM_Builder<"$res = llvm::Constant::getNullValue($_resultType);"> { let summary = "Creates a zero-initialized value of LLVM dialect type."; @@ -1588,6 +1590,7 @@ def LLVM_ZeroOp let builders = [LLVM_OneResultOpBuilder]; let assemblyFormat = "attr-dict `:` type($res)"; let hasVerifier = 1; + let hasFolder = 1; } def LLVM_ConstantOp diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml index eb7dd37010a67..fad234a9dcae9 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml @@ -3478,6 +3478,144 @@ structured_op: !LinalgStructuredOpConfig - !ScalarExpression scalar_arg: K --- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: conv_2d_ngchw_gfchw_q + cpp_class_name: Conv2DNgchwGfchwQOp + doc: |- + Performs 2-D grouped convolution with zero-point offsets. + + Layout: + * Input: NGCHW. + * Kernel: GFCHW. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. This includes the zero + point offsets common to quantized operations. + implements: + - LinalgConvolutionOpInterface +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + kind: input_tensor + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> + (s0, s1, s2, s3 * s4 + s5 * s6, s7 * s8 + s9 * s10)> + - !LinalgOperandDefConfig + name: K + kind: input_tensor + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> + (s1, s11, s2, s5, s9)> + - !LinalgOperandDefConfig + name: IZp + kind: scalar + type_var: I32 + - !LinalgOperandDefConfig + name: KZp + kind: scalar + type_var: I32 + - !LinalgOperandDefConfig + name: O + kind: output_tensor + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> + (s0, s1, s11, s3, s7)> + - !LinalgOperandDefConfig + name: strides + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] + -> (s4, s8)> + default_indices: + - 1 + - 1 + - !LinalgOperandDefConfig + name: dilations + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] + -> (s6, s10)> + default_indices: + - 1 + - 1 + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7, + s8, s9, s10, s11] -> (d0, d1, d5, d3 * s4 + d6 * s6, d4 * s8 + d7 * s10)> + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7, + s8, s9, s10, s11] -> (d1, d2, d5, d6, d7)> + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7, + s8, s9, s10, s11] -> ()> + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7, + s8, s9, s10, s11] -> ()> + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7, + s8, s9, s10, s11] -> (d0, d1, d2, d3, d4)> + iterator_types: + - parallel + - parallel + - parallel + - parallel + - parallel + - reduction + - reduction + - reduction + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_fn: + kind: binary + fn_name: add + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + scalar_fn: + kind: binary + fn_name: mul + operands: + - !ScalarExpression + scalar_fn: + kind: binary + fn_name: sub + operands: + - !ScalarExpression + scalar_fn: + kind: type + fn_name: cast_signed + type_var: U + operands: + - !ScalarExpression + scalar_arg: I + - !ScalarExpression + scalar_fn: + kind: type + fn_name: cast_signed + type_var: U + operands: + - !ScalarExpression + scalar_arg: IZp + - !ScalarExpression + scalar_fn: + kind: binary + fn_name: sub + operands: + - !ScalarExpression + scalar_fn: + kind: type + fn_name: cast_signed + type_var: U + operands: + - !ScalarExpression + scalar_arg: K + - !ScalarExpression + scalar_fn: + kind: type + fn_name: cast_signed + type_var: U + operands: + - !ScalarExpression + scalar_arg: KZp +--- !LinalgOpConfig metadata: !LinalgOpMetadata name: conv_3d_ndhwc_dhwcf cpp_class_name: Conv3DNdhwcDhwcfOp diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h index d36d1e70f0b14..f2955d55e59ec 100644 --- a/mlir/include/mlir/Dialect/Linalg/Passes.h +++ b/mlir/include/mlir/Dialect/Linalg/Passes.h @@ -22,10 +22,6 @@ namespace func { class FuncOp; } // namespace func -namespace bufferization { -struct OneShotBufferizationOptions; -} // namespace bufferization - #define GEN_PASS_DECL #include "mlir/Dialect/Linalg/Passes.h.inc" // IWYU pragma: keep diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td index 0a4ce8953136d..0621a9f33ba1e 100644 --- a/mlir/include/mlir/Dialect/Linalg/Passes.td +++ b/mlir/include/mlir/Dialect/Linalg/Passes.td @@ -89,16 +89,6 @@ def LinalgInlineScalarOperandsPass : Pass<"linalg-inline-scalar-operands"> { ]; } -def LinalgBufferizePass : Pass<"linalg-bufferize"> { - let summary = "Bufferize the linalg dialect"; - let dependentDialects = [ - "affine::AffineDialect", - "bufferization::BufferizationDialect", - "linalg::LinalgDialect", - "memref::MemRefDialect", - ]; -} - def LinalgGeneralizeNamedOpsPass : Pass<"linalg-generalize-named-ops"> { let summary = "Convert named ops into generic ops"; let dependentDialects = ["linalg::LinalgDialect"]; diff --git a/mlir/include/mlir/Dialect/Shape/Transforms/Passes.h b/mlir/include/mlir/Dialect/Shape/Transforms/Passes.h index cfb637f133f54..28e17459ff962 100644 --- a/mlir/include/mlir/Dialect/Shape/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Shape/Transforms/Passes.h @@ -47,13 +47,6 @@ void populateShapeRewritePatterns(RewritePatternSet &patterns); void populateRemoveShapeConstraintsPatterns(RewritePatternSet &patterns); std::unique_ptr> createRemoveShapeConstraintsPass(); -// Bufferizes shape dialect ops. -// -// Note that most shape dialect ops must be converted to std before -// bufferization happens, as they are intended to be bufferized at the std -// level. -std::unique_ptr> createShapeBufferizePass(); - /// Outline the shape computation part by adding shape.func and populate /// conrresponding mapping infomation into ShapeMappingAnalysis. std::unique_ptr> createOutlineShapeComputationPass(); diff --git a/mlir/include/mlir/Dialect/Shape/Transforms/Passes.td b/mlir/include/mlir/Dialect/Shape/Transforms/Passes.td index 9dfda9ea33615..83834509b4a35 100644 --- a/mlir/include/mlir/Dialect/Shape/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Shape/Transforms/Passes.td @@ -103,11 +103,4 @@ def ShapeToShapeLowering : Pass<"shape-to-shape-lowering", "func::FuncOp"> { let constructor = "mlir::createShapeToShapeLowering()"; } -// TODO: Generalize this to allow any type conversions desired. -def ShapeBufferize : Pass<"shape-bufferize", "func::FuncOp"> { - let summary = "Bufferize the shape dialect."; - let constructor = "mlir::createShapeBufferizePass()"; - let dependentDialects = ["bufferization::BufferizationDialect", - "memref::MemRefDialect"]; -} #endif // MLIR_DIALECT_SHAPE_TRANSFORMS_PASSES diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h index bb49d6c256f21..d6d038ef65bdf 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h @@ -65,12 +65,6 @@ void populateSparseAssembler(RewritePatternSet &patterns, bool directOut); std::unique_ptr createSparseAssembler(); std::unique_ptr createSparseAssembler(bool directOut); -//===----------------------------------------------------------------------===// -// The SparseEncodingPropagation pass. -//===----------------------------------------------------------------------===// - -std::unique_ptr createSparseEncodingPropagationPass(); - //===----------------------------------------------------------------------===// // The SparseReinterpretMap pass. //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td index 94c3ca60030ee..2f844cee5ff52 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td @@ -40,42 +40,6 @@ def SparseAssembler : Pass<"sparse-assembler", "ModuleOp"> { ]; } -def SparseEncodingPropagation : Pass<"sparse-encoding-propagation", "func::FuncOp"> { - let summary = "Propagate sparse tensor encodings"; - let description = [{ - A pass that propagates sparse tensor encodings. - - Background: To avoid introducing repetitive operations, sparse tensors - in MLIR try to reuse tensor operations whenever available. However, most - tensor operations are canonicalized/transformed without the knowledge - of sparsity. The pass tries to propagate missing sparse encodings. - - For example: - ```mlir - %s = tensor.extract_slice %input[0, 0,] [2, 1] [1, 1] - : tensor<2x3xf32, #sparse> to tensor<2x1xf32, #sparse> - - // After rank reducing (by tensor dialect transformation) - %t = tensor.extract_slice %input[0, 0,] [2, 1] [1, 1] - : tensor<2x3xf32, #sparse> to tensor<2xf32> - %s = tensor.expand_shape [[0, 1]] %t - : tensor<2xf32> to tensor<2x1xf32, #sparse> - - // After sparsity propagation - %t = tensor.extract_slice %input[0, 0,] [2, 1] [1, 1] - : tensor<2x3xf32, #sparse> to tensor<2xf32, #sparse1> - %s = tensor.expand_shape [[0, 1]] %t - : tensor<2xf32, #sparse1> to tensor<2x1xf32, #sparse> - ``` - }]; - - let constructor = "mlir::createSparseEncodingPropagationPass()"; - let dependentDialects = [ - "sparse_tensor::SparseTensorDialect", - "tensor::TensorDialect", - ]; -} - def SparseReinterpretMap : Pass<"sparse-reinterpret-map", "ModuleOp"> { let summary = "Reinterprets sparse tensor type mappings"; let description = [{ diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h b/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h index 48f9066934a25..964c35b3f15b8 100644 --- a/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h @@ -21,9 +21,6 @@ namespace tensor { /// Creates an instance of the `tensor` subset folding pass. std::unique_ptr createFoldTensorSubsetOpsPass(); -/// Creates an instance of the `tensor` dialect bufferization pass. -std::unique_ptr createTensorBufferizePass(); - //===----------------------------------------------------------------------===// // Registration //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.td b/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.td index 4cc3844f29120..be4c333836ec0 100644 --- a/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.td @@ -27,9 +27,4 @@ def FoldTensorSubsetOps : Pass<"fold-tensor-subset-ops"> { ]; } -def TensorBufferize : Pass<"tensor-bufferize", "func::FuncOp"> { - let summary = "Bufferize the `tensor` dialect"; - let constructor = "mlir::tensor::createTensorBufferizePass()"; -} - #endif // MLIR_DIALECT_TENSOR_TRANSFORMS_PASSES diff --git a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.h b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.h index fbfc56dfe2cf4..1f9522b51a4cf 100644 --- a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.h @@ -18,6 +18,7 @@ #include "mlir/Pass/Pass.h" namespace mlir { +class TypeConverter; namespace tosa { #define GEN_PASS_DECL @@ -38,6 +39,8 @@ void populateTosaConstantReduction(MLIRContext *ctx, RewritePatternSet &patterns, bool aggressiveReduceConstant); +void populateTosaTypeConversion(TypeConverter &converter); + std::unique_ptr createTosaLayerwiseConstantFoldPass(); std::unique_ptr createTosaLayerwiseConstantFoldPass( const TosaLayerwiseConstantFoldPassOptions &options); diff --git a/mlir/include/mlir/Dialect/Vector/Transforms/Passes.h b/mlir/include/mlir/Dialect/Vector/Transforms/Passes.h index 911402551e14d..5667f4fa95ace 100644 --- a/mlir/include/mlir/Dialect/Vector/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Vector/Transforms/Passes.h @@ -17,9 +17,6 @@ namespace vector { #define GEN_PASS_DECL #include "mlir/Dialect/Vector/Transforms/Passes.h.inc" -/// Creates an instance of the `vector` dialect bufferization pass. -std::unique_ptr createVectorBufferizePass(); - /// Creates an instance of the `vector.mask` lowering pass. std::unique_ptr createLowerVectorMaskPass(); diff --git a/mlir/include/mlir/Dialect/Vector/Transforms/Passes.td b/mlir/include/mlir/Dialect/Vector/Transforms/Passes.td index 31a0b3b2f0c53..7436998749791 100644 --- a/mlir/include/mlir/Dialect/Vector/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Vector/Transforms/Passes.td @@ -11,11 +11,6 @@ include "mlir/Pass/PassBase.td" -def VectorBufferize : Pass<"vector-bufferize", "func::FuncOp"> { - let summary = "Bufferize Vector dialect ops"; - let constructor = "mlir::vector::createVectorBufferizePass()"; -} - def LowerVectorMaskPass : Pass<"lower-vector-mask", "func::FuncOp"> { let summary = "Lower 'vector.mask' operations"; let constructor = "mlir::vector::createLowerVectorMaskPass()"; diff --git a/mlir/include/mlir/Interfaces/InferIntRangeInterface.h b/mlir/include/mlir/Interfaces/InferIntRangeInterface.h index 05064a72ef02e..0e107e88f5232 100644 --- a/mlir/include/mlir/Interfaces/InferIntRangeInterface.h +++ b/mlir/include/mlir/Interfaces/InferIntRangeInterface.h @@ -105,10 +105,83 @@ class ConstantIntRanges { raw_ostream &operator<<(raw_ostream &, const ConstantIntRanges &); +/// This lattice value represents the integer range of an SSA value. +class IntegerValueRange { +public: + /// Create a maximal range ([0, uint_max(t)] / [int_min(t), int_max(t)]) + /// range that is used to mark the value as unable to be analyzed further, + /// where `t` is the type of `value`. + static IntegerValueRange getMaxRange(Value value); + + /// Create an integer value range lattice value. + IntegerValueRange(ConstantIntRanges value) : value(std::move(value)) {} + + /// Create an integer value range lattice value. + IntegerValueRange(std::optional value = std::nullopt) + : value(std::move(value)) {} + + /// Whether the range is uninitialized. This happens when the state hasn't + /// been set during the analysis. + bool isUninitialized() const { return !value.has_value(); } + + /// Get the known integer value range. + const ConstantIntRanges &getValue() const { + assert(!isUninitialized()); + return *value; + } + + /// Compare two ranges. + bool operator==(const IntegerValueRange &rhs) const { + return value == rhs.value; + } + + /// Compute the least upper bound of two ranges. + static IntegerValueRange join(const IntegerValueRange &lhs, + const IntegerValueRange &rhs) { + if (lhs.isUninitialized()) + return rhs; + if (rhs.isUninitialized()) + return lhs; + return IntegerValueRange{lhs.getValue().rangeUnion(rhs.getValue())}; + } + + /// Print the integer value range. + void print(raw_ostream &os) const { os << value; } + +private: + /// The known integer value range. + std::optional value; +}; + +raw_ostream &operator<<(raw_ostream &, const IntegerValueRange &); + /// The type of the `setResultRanges` callback provided to ops implementing /// InferIntRangeInterface. It should be called once for each integer result /// value and be passed the ConstantIntRanges corresponding to that value. -using SetIntRangeFn = function_ref; +using SetIntRangeFn = + llvm::function_ref; + +/// Similar to SetIntRangeFn, but operating on IntegerValueRange lattice values. +/// This is the `setResultRanges` callback for the IntegerValueRange based +/// interface method. +using SetIntLatticeFn = + llvm::function_ref; + +class InferIntRangeInterface; + +namespace intrange::detail { +/// Default implementation of `inferResultRanges` which dispatches to the +/// `inferResultRangesFromOptional`. +void defaultInferResultRanges(InferIntRangeInterface interface, + ArrayRef argRanges, + SetIntLatticeFn setResultRanges); + +/// Default implementation of `inferResultRangesFromOptional` which dispatches +/// to the `inferResultRanges`. +void defaultInferResultRangesFromOptional(InferIntRangeInterface interface, + ArrayRef argRanges, + SetIntRangeFn setResultRanges); +} // end namespace intrange::detail } // end namespace mlir #include "mlir/Interfaces/InferIntRangeInterface.h.inc" diff --git a/mlir/include/mlir/Interfaces/InferIntRangeInterface.td b/mlir/include/mlir/Interfaces/InferIntRangeInterface.td index dbdc526c6f10b..6ee436ce4d6c2 100644 --- a/mlir/include/mlir/Interfaces/InferIntRangeInterface.td +++ b/mlir/include/mlir/Interfaces/InferIntRangeInterface.td @@ -28,9 +28,10 @@ def InferIntRangeInterface : OpInterface<"InferIntRangeInterface"> { Infer the bounds on the results of this op given the bounds on its arguments. For each result value or block argument (that isn't a branch argument, since the dataflow analysis handles those case), the method should call - `setValueRange` with that `Value` as an argument. When `setValueRange` - is not called for some value, it will recieve a default value of the mimimum - and maximum values for its type (the unbounded range). + `setValueRange` with that `Value` as an argument. When implemented, + `setValueRange` should be called on all result values for the operation. + When operations take non-integer inputs, the + `inferResultRangesFromOptional` method should be implemented instead. When called on an op that also implements the RegionBranchOpInterface or BranchOpInterface, this method should not attempt to infer the values @@ -39,14 +40,39 @@ def InferIntRangeInterface : OpInterface<"InferIntRangeInterface"> { This function will only be called when at least one result of the op is a scalar integer value or the op has a region. + }], + /*retTy=*/"void", + /*methodName=*/"inferResultRanges", + /*args=*/(ins "::llvm::ArrayRef<::mlir::ConstantIntRanges>":$argRanges, + "::mlir::SetIntRangeFn":$setResultRanges), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + ::mlir::intrange::detail::defaultInferResultRangesFromOptional($_op, + argRanges, + setResultRanges); + }]>, + + InterfaceMethod<[{ + Infer the bounds on the results of this op given the lattice representation + of the bounds for its arguments. For each result value or block argument + (that isn't a branch argument, since the dataflow analysis handles + those case), the method should call `setValueRange` with that `Value` + as an argument. When implemented, `setValueRange` should be called on + all result values for the operation. - `argRanges` contains one `IntRangeAttrs` for each argument to the op in ODS - order. Non-integer arguments will have the an unbounded range of width-0 - APInts in their `argRanges` element. + This method allows for more precise implementations when operations + want to reason about inputs which may be undefined during the analysis. }], - "void", "inferResultRanges", (ins - "::llvm::ArrayRef<::mlir::ConstantIntRanges>":$argRanges, - "::mlir::SetIntRangeFn":$setResultRanges) - >]; + /*retTy=*/"void", + /*methodName=*/"inferResultRangesFromOptional", + /*args=*/(ins "::llvm::ArrayRef<::mlir::IntegerValueRange>":$argRanges, + "::mlir::SetIntLatticeFn":$setResultRanges), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + ::mlir::intrange::detail::defaultInferResultRanges($_op, + argRanges, + setResultRanges); + }]> + ]; } #endif // MLIR_INTERFACES_INFERINTRANGEINTERFACE diff --git a/mlir/include/mlir/Interfaces/Utils/InferIntRangeCommon.h b/mlir/include/mlir/Interfaces/Utils/InferIntRangeCommon.h index 851bb534bc7ee..3988a8826498a 100644 --- a/mlir/include/mlir/Interfaces/Utils/InferIntRangeCommon.h +++ b/mlir/include/mlir/Interfaces/Utils/InferIntRangeCommon.h @@ -25,7 +25,11 @@ namespace intrange { /// abstracted away here to permit writing the function that handles both /// 64- and 32-bit index types. using InferRangeFn = - function_ref)>; + std::function)>; + +/// Function that performs inferrence on an array of `IntegerValueRange`. +using InferIntegerValueRangeFn = + std::function)>; static constexpr unsigned indexMinWidth = 32; static constexpr unsigned indexMaxWidth = 64; @@ -52,7 +56,7 @@ using InferRangeWithOvfFlagsFn = /// /// The `mode` argument specifies if the unsigned, signed, or both results of /// the inference computation should be used when comparing the results. -ConstantIntRanges inferIndexOp(InferRangeFn inferFn, +ConstantIntRanges inferIndexOp(const InferRangeFn &inferFn, ArrayRef argRanges, CmpMode mode); diff --git a/mlir/lib/Analysis/DataFlow/IntegerRangeAnalysis.cpp b/mlir/lib/Analysis/DataFlow/IntegerRangeAnalysis.cpp index a82c30717e275..9721620807a0f 100644 --- a/mlir/lib/Analysis/DataFlow/IntegerRangeAnalysis.cpp +++ b/mlir/lib/Analysis/DataFlow/IntegerRangeAnalysis.cpp @@ -36,17 +36,6 @@ using namespace mlir; using namespace mlir::dataflow; -IntegerValueRange IntegerValueRange::getMaxRange(Value value) { - unsigned width = ConstantIntRanges::getStorageBitwidth(value.getType()); - if (width == 0) - return {}; - APInt umin = APInt::getMinValue(width); - APInt umax = APInt::getMaxValue(width); - APInt smin = width != 0 ? APInt::getSignedMinValue(width) : umin; - APInt smax = width != 0 ? APInt::getSignedMaxValue(width) : umax; - return IntegerValueRange{ConstantIntRanges{umin, umax, smin, smax}}; -} - void IntegerValueRangeLattice::onUpdate(DataFlowSolver *solver) const { Lattice::onUpdate(solver); @@ -72,24 +61,17 @@ void IntegerValueRangeLattice::onUpdate(DataFlowSolver *solver) const { void IntegerRangeAnalysis::visitOperation( Operation *op, ArrayRef operands, ArrayRef results) { - // If the lattice on any operand is unitialized, bail out. - if (llvm::any_of(operands, [](const IntegerValueRangeLattice *lattice) { - return lattice->getValue().isUninitialized(); - })) { - return; - } - auto inferrable = dyn_cast(op); if (!inferrable) return setAllToEntryStates(results); LLVM_DEBUG(llvm::dbgs() << "Inferring ranges for " << *op << "\n"); - SmallVector argRanges( - llvm::map_range(operands, [](const IntegerValueRangeLattice *val) { - return val->getValue().getValue(); - })); + auto argRanges = llvm::map_to_vector( + operands, [](const IntegerValueRangeLattice *lattice) { + return lattice->getValue(); + }); - auto joinCallback = [&](Value v, const ConstantIntRanges &attrs) { + auto joinCallback = [&](Value v, const IntegerValueRange &attrs) { auto result = dyn_cast(v); if (!result) return; @@ -99,7 +81,7 @@ void IntegerRangeAnalysis::visitOperation( IntegerValueRangeLattice *lattice = results[result.getResultNumber()]; IntegerValueRange oldRange = lattice->getValue(); - ChangeResult changed = lattice->join(IntegerValueRange{attrs}); + ChangeResult changed = lattice->join(attrs); // Catch loop results with loop variant bounds and conservatively make // them [-inf, inf] so we don't circle around infinitely often (because @@ -116,7 +98,7 @@ void IntegerRangeAnalysis::visitOperation( propagateIfChanged(lattice, changed); }; - inferrable.inferResultRanges(argRanges, joinCallback); + inferrable.inferResultRangesFromOptional(argRanges, joinCallback); } void IntegerRangeAnalysis::visitNonControlFlowArguments( @@ -124,17 +106,12 @@ void IntegerRangeAnalysis::visitNonControlFlowArguments( ArrayRef argLattices, unsigned firstIndex) { if (auto inferrable = dyn_cast(op)) { LLVM_DEBUG(llvm::dbgs() << "Inferring ranges for " << *op << "\n"); - // If the lattice on any operand is unitialized, bail out. - if (llvm::any_of(op->getOperands(), [&](Value value) { - return getLatticeElementFor(op, value)->getValue().isUninitialized(); - })) - return; - SmallVector argRanges( - llvm::map_range(op->getOperands(), [&](Value value) { - return getLatticeElementFor(op, value)->getValue().getValue(); - })); - auto joinCallback = [&](Value v, const ConstantIntRanges &attrs) { + auto argRanges = llvm::map_to_vector(op->getOperands(), [&](Value value) { + return getLatticeElementFor(op, value)->getValue(); + }); + + auto joinCallback = [&](Value v, const IntegerValueRange &attrs) { auto arg = dyn_cast(v); if (!arg) return; @@ -145,7 +122,7 @@ void IntegerRangeAnalysis::visitNonControlFlowArguments( IntegerValueRangeLattice *lattice = argLattices[arg.getArgNumber()]; IntegerValueRange oldRange = lattice->getValue(); - ChangeResult changed = lattice->join(IntegerValueRange{attrs}); + ChangeResult changed = lattice->join(attrs); // Catch loop results with loop variant bounds and conservatively make // them [-inf, inf] so we don't circle around infinitely often (because @@ -162,7 +139,7 @@ void IntegerRangeAnalysis::visitNonControlFlowArguments( propagateIfChanged(lattice, changed); }; - inferrable.inferResultRanges(argRanges, joinCallback); + inferrable.inferResultRangesFromOptional(argRanges, joinCallback); return; } diff --git a/mlir/lib/Conversion/ArithToEmitC/ArithToEmitC.cpp b/mlir/lib/Conversion/ArithToEmitC/ArithToEmitC.cpp index 0be3d76f556de..388794ec122d2 100644 --- a/mlir/lib/Conversion/ArithToEmitC/ArithToEmitC.cpp +++ b/mlir/lib/Conversion/ArithToEmitC/ArithToEmitC.cpp @@ -394,7 +394,9 @@ void mlir::populateArithToEmitCPatterns(TypeConverter &typeConverter, ArithConstantOpConversionPattern, ArithOpConversion, ArithOpConversion, + ArithOpConversion, ArithOpConversion, + ArithOpConversion, ArithOpConversion, IntegerOpConversion, IntegerOpConversion, diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index f425b1f59d994..70dcccf0a7307 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -77,9 +77,9 @@ Value getLaneId(ConversionPatternRewriter &rewriter, Location loc, } static constexpr StringLiteral amdgcnDataLayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" - "-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:" - "128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-" - "G1-ni:7:8"; + "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:" + "32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:" + "64-S32-A5-G1-ni:7:8:9"; namespace { struct GPULaneIdOpToROCDL : ConvertOpToLLVMPattern { diff --git a/mlir/lib/Conversion/TosaToTensor/TosaToTensor.cpp b/mlir/lib/Conversion/TosaToTensor/TosaToTensor.cpp index 89f956a5e7017..c0c015ab34aab 100644 --- a/mlir/lib/Conversion/TosaToTensor/TosaToTensor.cpp +++ b/mlir/lib/Conversion/TosaToTensor/TosaToTensor.cpp @@ -224,8 +224,17 @@ class ReshapeConverter : public OpConversionPattern { matchAndRewrite(tosa::ReshapeOp reshape, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const final { auto loc = reshape.getLoc(); - auto resultType = reshape.getResult().getType(); - auto input = reshape.getInput1(); + auto resultType = cast_if_present( + getTypeConverter()->convertType(reshape.getType())); + if (!resultType) { + return rewriter.notifyMatchFailure(reshape.getLoc(), + "could not convert result type"); + } + auto input = dyn_cast>(adaptor.getInput1()); + if (!input) { + return rewriter.notifyMatchFailure(reshape.getLoc(), + "expected input type to be tensor"); + } auto newShape = reshape.getNewShape(); // Infer all intermediate types @@ -288,12 +297,13 @@ class SliceConverter : public OpConversionPattern { } }; -class PadConverter : public OpRewritePattern { +class PadConverter : public OpConversionPattern { public: - using OpRewritePattern::OpRewritePattern; + using OpConversionPattern::OpConversionPattern; - LogicalResult matchAndRewrite(tosa::PadOp padOp, - PatternRewriter &rewriter) const final { + LogicalResult + matchAndRewrite(tosa::PadOp padOp, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const final { auto loc = padOp.getLoc(); auto input = padOp.getInput1(); auto padding = padOp.getPadding(); @@ -428,11 +438,8 @@ struct ConcatConverter : public OpConversionPattern { } // namespace void mlir::tosa::populateTosaToTensorConversionPatterns( - RewritePatternSet *patterns) { - patterns->add< - ConcatConverter, - PadConverter, - ReshapeConverter, - SliceConverter - >(patterns->getContext()); + TypeConverter &converter, RewritePatternSet *patterns) { + patterns + ->add( + converter, patterns->getContext()); } diff --git a/mlir/lib/Conversion/TosaToTensor/TosaToTensorPass.cpp b/mlir/lib/Conversion/TosaToTensor/TosaToTensorPass.cpp index 50dc55667fb94..fa1c2cf7fba98 100644 --- a/mlir/lib/Conversion/TosaToTensor/TosaToTensorPass.cpp +++ b/mlir/lib/Conversion/TosaToTensor/TosaToTensorPass.cpp @@ -42,7 +42,10 @@ struct TosaToTensor : public impl::TosaToTensorBase { target.addLegalDialect(); target.addLegalDialect(); - mlir::tosa::populateTosaToTensorConversionPatterns(&patterns); + TypeConverter converter; + mlir::tosa::populateTosaTypeConversion(converter); + + mlir::tosa::populateTosaToTensorConversionPatterns(converter, &patterns); if (failed(applyPartialConversion(getOperation(), target, std::move(patterns)))) diff --git a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp index a0b50251c6b67..5797c5681a5fd 100644 --- a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp +++ b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp @@ -2467,6 +2467,12 @@ TypedAttr mlir::arith::getIdentityValueAttr(AtomicRMWKind kind, Type resultType, : APFloat::getInf(semantic, /*Negative=*/true); return builder.getFloatAttr(resultType, identity); } + case AtomicRMWKind::maxnumf: { + const llvm::fltSemantics &semantic = + llvm::cast(resultType).getFloatSemantics(); + APFloat identity = APFloat::getNaN(semantic, /*Negative=*/true); + return builder.getFloatAttr(resultType, identity); + } case AtomicRMWKind::addf: case AtomicRMWKind::addi: case AtomicRMWKind::maxu: @@ -2489,6 +2495,12 @@ TypedAttr mlir::arith::getIdentityValueAttr(AtomicRMWKind kind, Type resultType, return builder.getFloatAttr(resultType, identity); } + case AtomicRMWKind::minnumf: { + const llvm::fltSemantics &semantic = + llvm::cast(resultType).getFloatSemantics(); + APFloat identity = APFloat::getNaN(semantic, /*Negative=*/false); + return builder.getFloatAttr(resultType, identity); + } case AtomicRMWKind::mins: return builder.getIntegerAttr( resultType, APInt::getSignedMaxValue( @@ -2518,6 +2530,8 @@ std::optional mlir::arith::getNeutralElement(Operation *op) { .Case([](arith::MulFOp op) { return AtomicRMWKind::mulf; }) .Case([](arith::MaximumFOp op) { return AtomicRMWKind::maximumf; }) .Case([](arith::MinimumFOp op) { return AtomicRMWKind::minimumf; }) + .Case([](arith::MaxNumFOp op) { return AtomicRMWKind::maxnumf; }) + .Case([](arith::MinNumFOp op) { return AtomicRMWKind::minnumf; }) // Integer operations. .Case([](arith::AddIOp op) { return AtomicRMWKind::addi; }) .Case([](arith::OrIOp op) { return AtomicRMWKind::ori; }) diff --git a/mlir/lib/Dialect/Arith/IR/InferIntRangeInterfaceImpls.cpp b/mlir/lib/Dialect/Arith/IR/InferIntRangeInterfaceImpls.cpp index fbe2ecab8adca..462044417b5fb 100644 --- a/mlir/lib/Dialect/Arith/IR/InferIntRangeInterfaceImpls.cpp +++ b/mlir/lib/Dialect/Arith/IR/InferIntRangeInterfaceImpls.cpp @@ -295,18 +295,24 @@ void arith::CmpIOp::inferResultRanges(ArrayRef argRanges, // SelectOp //===----------------------------------------------------------------------===// -void arith::SelectOp::inferResultRanges(ArrayRef argRanges, - SetIntRangeFn setResultRange) { - std::optional mbCondVal = argRanges[0].getConstantValue(); +void arith::SelectOp::inferResultRangesFromOptional( + ArrayRef argRanges, SetIntLatticeFn setResultRange) { + std::optional mbCondVal = + argRanges[0].isUninitialized() + ? std::nullopt + : argRanges[0].getValue().getConstantValue(); + + const IntegerValueRange &trueCase = argRanges[1]; + const IntegerValueRange &falseCase = argRanges[2]; if (mbCondVal) { if (mbCondVal->isZero()) - setResultRange(getResult(), argRanges[2]); + setResultRange(getResult(), falseCase); else - setResultRange(getResult(), argRanges[1]); + setResultRange(getResult(), trueCase); return; } - setResultRange(getResult(), argRanges[1].rangeUnion(argRanges[2])); + setResultRange(getResult(), IntegerValueRange::join(trueCase, falseCase)); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Arith/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Arith/Transforms/Bufferize.cpp deleted file mode 100644 index 9a066756f429c..0000000000000 --- a/mlir/lib/Dialect/Arith/Transforms/Bufferize.cpp +++ /dev/null @@ -1,67 +0,0 @@ -//===- Bufferize.cpp - Bufferization for Arith ops ---------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/Arith/Transforms/Passes.h" - -#include "mlir/Dialect/Arith/IR/Arith.h" -#include "mlir/Dialect/Arith/Transforms/BufferizableOpInterfaceImpl.h" -#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" -#include "mlir/Dialect/Bufferization/IR/Bufferization.h" -#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" -#include "mlir/Dialect/MemRef/IR/MemRef.h" - -namespace mlir { -namespace arith { -#define GEN_PASS_DEF_ARITHBUFFERIZEPASS -#include "mlir/Dialect/Arith/Transforms/Passes.h.inc" -} // namespace arith -} // namespace mlir - -using namespace mlir; -using namespace bufferization; - -namespace { -/// Pass to bufferize Arith ops. -struct ArithBufferizePass - : public arith::impl::ArithBufferizePassBase { - using ArithBufferizePassBase::ArithBufferizePassBase; - - ArithBufferizePass(uint64_t alignment = 0, bool constantOpOnly = false) - : constantOpOnly(constantOpOnly) { - this->alignment = alignment; - } - - void runOnOperation() override { - BufferizationOptions options = getPartialBufferizationOptions(); - if (constantOpOnly) { - options.opFilter.allowOperation(); - } else { - options.opFilter.allowDialect(); - } - options.bufferAlignment = alignment; - - if (failed(bufferizeOp(getOperation(), options))) - signalPassFailure(); - } - - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); - arith::registerBufferizableOpInterfaceExternalModels(registry); - } - -private: - bool constantOpOnly; -}; -} // namespace - -std::unique_ptr -mlir::arith::createConstantBufferizePass(uint64_t alignment) { - return std::make_unique(alignment, - /*constantOpOnly=*/true); -} diff --git a/mlir/lib/Dialect/Arith/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Arith/Transforms/CMakeLists.txt index 12659eaba1fa5..6b8bde8dc2aaf 100644 --- a/mlir/lib/Dialect/Arith/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Arith/Transforms/CMakeLists.txt @@ -1,7 +1,6 @@ add_mlir_dialect_library(MLIRArithTransforms BufferDeallocationOpInterfaceImpl.cpp BufferizableOpInterfaceImpl.cpp - Bufferize.cpp BufferViewFlowOpInterfaceImpl.cpp EmulateUnsupportedFloats.cpp EmulateWideInt.cpp diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp index 7ba347a1f15e4..0fddd60eb8140 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp @@ -320,29 +320,6 @@ struct OneShotBufferizePass }; } // namespace -namespace { -struct BufferizationBufferizePass - : public bufferization::impl::BufferizationBufferizeBase< - BufferizationBufferizePass> { - void runOnOperation() override { - BufferizationOptions options = getPartialBufferizationOptions(); - options.opFilter.allowDialect(); - - if (failed(bufferizeOp(getOperation(), options))) - signalPassFailure(); - } - - void getDependentDialects(DialectRegistry ®istry) const override { - registry - .insert(); - } -}; -} // namespace - -std::unique_ptr mlir::bufferization::createBufferizationBufferizePass() { - return std::make_unique(); -} - std::unique_ptr mlir::bufferization::createOneShotBufferizePass() { return std::make_unique(); } diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index dcf3f3b52a606..60b911948d4a0 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -2555,6 +2555,24 @@ Region *LLVMFuncOp::getCallableRegion() { return &getBody(); } +//===----------------------------------------------------------------------===// +// UndefOp. +//===----------------------------------------------------------------------===// + +/// Fold an undef operation to a dedicated undef attribute. +OpFoldResult LLVM::UndefOp::fold(FoldAdaptor) { + return LLVM::UndefAttr::get(getContext()); +} + +//===----------------------------------------------------------------------===// +// PoisonOp. +//===----------------------------------------------------------------------===// + +/// Fold a poison operation to a dedicated poison attribute. +OpFoldResult LLVM::PoisonOp::fold(FoldAdaptor) { + return LLVM::PoisonAttr::get(getContext()); +} + //===----------------------------------------------------------------------===// // ZeroOp. //===----------------------------------------------------------------------===// @@ -2568,6 +2586,15 @@ LogicalResult LLVM::ZeroOp::verify() { return success(); } +/// Fold a zero operation to a builtin zero attribute when possible and fall +/// back to a dedicated zero attribute. +OpFoldResult LLVM::ZeroOp::fold(FoldAdaptor) { + OpFoldResult result = Builder(getContext()).getZeroAttr(getType()); + if (result) + return result; + return LLVM::ZeroAttr::get(getContext()); +} + //===----------------------------------------------------------------------===// // ConstantOp. //===----------------------------------------------------------------------===// @@ -3271,11 +3298,18 @@ LogicalResult LLVMDialect::verifyRegionResultAttribute(Operation *op, Operation *LLVMDialect::materializeConstant(OpBuilder &builder, Attribute value, Type type, Location loc) { - // If this was folded from an llvm.mlir.addressof operation, it should be - // materialized as such. + // If this was folded from an operation other than llvm.mlir.constant, it + // should be materialized as such. Note that an llvm.mlir.zero may fold into + // a builtin zero attribute and thus will materialize as a llvm.mlir.constant. if (auto symbol = dyn_cast(value)) if (isa(type)) return builder.create(loc, type, symbol); + if (isa(value)) + return builder.create(loc, type); + if (isa(value)) + return builder.create(loc, type); + if (isa(value)) + return builder.create(loc, type); // Otherwise try materializing it as a regular llvm.mlir.constant op. return LLVM::ConstantOp::materialize(builder, value, type, loc); } diff --git a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp deleted file mode 100644 index 8812ca14ba610..0000000000000 --- a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp +++ /dev/null @@ -1,52 +0,0 @@ -//===- Bufferize.cpp - Bufferization of linalg ops ------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/Linalg/Passes.h" - -#include "mlir/Dialect/Affine/IR/AffineOps.h" -#include "mlir/Dialect/Bufferization/IR/Bufferization.h" -#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Dialect/Linalg/IR/Linalg.h" -#include "mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h" -#include "mlir/Dialect/Linalg/Transforms/Transforms.h" -#include "mlir/Dialect/Tensor/IR/Tensor.h" -#include "mlir/IR/BuiltinDialect.h" -#include "mlir/IR/Operation.h" -#include "mlir/Pass/Pass.h" - -namespace mlir { -#define GEN_PASS_DEF_LINALGBUFFERIZEPASS -#include "mlir/Dialect/Linalg/Passes.h.inc" -} // namespace mlir - -using namespace mlir; -using namespace bufferization; - -namespace { -/// Converts Linalg operations that work on tensor-type operands or results to -/// work on buffers. -struct LinalgBufferizePass - : public impl::LinalgBufferizePassBase { - using impl::LinalgBufferizePassBase< - LinalgBufferizePass>::LinalgBufferizePassBase; - void runOnOperation() override { - BufferizationOptions options = getPartialBufferizationOptions(); - options.opFilter.allowDialect(); - - if (failed(bufferizeOp(getOperation(), options))) - signalPassFailure(); - } - - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); - linalg::registerBufferizableOpInterfaceExternalModels(registry); - } -}; -} // namespace diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt index ed9f40089282a..7e3dc56e0acdc 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt @@ -2,7 +2,6 @@ add_mlir_dialect_library(MLIRLinalgTransforms AllInterfaces.cpp BubbleUpExtractSlice.cpp BufferizableOpInterfaceImpl.cpp - Bufferize.cpp ConstantFold.cpp ConvertToDestinationStyle.cpp ConvertConv2DToImg2Col.cpp diff --git a/mlir/lib/Dialect/Linalg/Transforms/ConstantFold.cpp b/mlir/lib/Dialect/Linalg/Transforms/ConstantFold.cpp index 8fffabf11f3fd..2e6079e1402e1 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ConstantFold.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ConstantFold.cpp @@ -23,21 +23,21 @@ using namespace mlir; using namespace mlir::linalg; namespace { -/// Base class for constant folding linalg.generic ops with N inputs, 1 output, -/// and permutation indexing maps. +/// Base class for constant folding linalg structured ops with N inputs, 1 +/// output, and permutation indexing maps. /// /// `ConcreteType` should provide methods with signatures /// /// ```c++ -/// bool matchIndexingMaps(GenericOp genericOp) const; -/// RegionComputationFn getRegionComputeFn(GenericOp) const; +/// bool matchIndexingMaps(LinalgOp linalgOp) const; +/// RegionComputationFn getRegionComputeFn(LinalgOp) const; /// ``` /// /// The latter inspects the region and returns the computation inside as a /// functor. The functor will be invoked with constant elements for all inputs /// and should return the corresponding computed constant element for output. template -class FoldConstantBase : public OpRewritePattern { +class FoldConstantBase : public OpInterfaceRewritePattern { public: struct APIntOrFloat { std::optional apInt; @@ -52,25 +52,26 @@ class FoldConstantBase : public OpRewritePattern { FoldConstantBase(MLIRContext *context, const ControlFusionFn &controlFn, PatternBenefit benefit = 1) - : OpRewritePattern(context, benefit), controlFn(controlFn) {} + : OpInterfaceRewritePattern(context, benefit), + controlFn(controlFn) {} - LogicalResult matchAndRewrite(GenericOp genericOp, + LogicalResult matchAndRewrite(LinalgOp linalgOp, PatternRewriter &rewriter) const override { // Mixed and buffer sematics aren't supported. - if (!genericOp.hasPureTensorSemantics()) + if (!linalgOp.hasPureTensorSemantics()) return failure(); // Only support ops generating one output for now. - if (genericOp.getNumDpsInits() != 1) + if (linalgOp.getNumDpsInits() != 1) return failure(); - auto outputType = dyn_cast(genericOp.getResultTypes().front()); + auto outputType = dyn_cast(linalgOp->getResultTypes().front()); // Require the output types to be static given that we are generating // constants. if (!outputType || !outputType.hasStaticShape()) return failure(); - if (!llvm::all_of(genericOp.getInputs(), [](Value input) { + if (!llvm::all_of(linalgOp.getDpsInputs(), [](Value input) { return isa(input.getType()); })) return failure(); @@ -80,7 +81,7 @@ class FoldConstantBase : public OpRewritePattern { return cast(value.getType()).getElementType(); }; if (!llvm::all_equal( - llvm::map_range(genericOp->getOperands(), getOperandElementType))) + llvm::map_range(linalgOp->getOperands(), getOperandElementType))) return failure(); // We can only handle the case where we have int/float elements. @@ -93,30 +94,30 @@ class FoldConstantBase : public OpRewritePattern { // entirely in the compiler, without needing to turn all indices into // Values, and then do affine apply on them, and then match back the // constant again. - if (!llvm::all_of(genericOp.getIndexingMapsArray(), + if (!llvm::all_of(linalgOp.getIndexingMapsArray(), [](AffineMap map) { return map.isPermutation(); })) return failure(); - for (OpOperand &operand : genericOp.getDpsInitsMutable()) { - if (genericOp.payloadUsesValueFromOperand(&operand)) + for (OpOperand &operand : linalgOp.getDpsInitsMutable()) { + if (linalgOp.payloadUsesValueFromOperand(&operand)) return failure(); } // Further check the indexing maps are okay for the ConcreteType. - if (!static_cast(this)->matchIndexingMaps(genericOp)) + if (!static_cast(this)->matchIndexingMaps(linalgOp)) return failure(); // Defer to the concrete type to check the region and discover the // computation inside. RegionComputationFn computeFn = - static_cast(this)->getRegionComputeFn(genericOp); + static_cast(this)->getRegionComputeFn(linalgOp); if (!computeFn) return failure(); // All inputs should be constants. - int numInputs = genericOp.getNumDpsInputs(); + int numInputs = linalgOp.getNumDpsInputs(); SmallVector inputValues(numInputs); - for (const auto &en : llvm::enumerate(genericOp.getDpsInputOperands())) { + for (const auto &en : llvm::enumerate(linalgOp.getDpsInputOperands())) { if (!matchPattern(en.value()->get(), m_Constant(&inputValues[en.index()]))) return failure(); @@ -124,12 +125,11 @@ class FoldConstantBase : public OpRewritePattern { // Identified this as a potential candidate for folding. Now check the // policy to see whether we are allowed to proceed. - for (OpOperand *operand : genericOp.getDpsInputOperands()) { + for (OpOperand *operand : linalgOp.getDpsInputOperands()) { if (!controlFn(operand)) return failure(); } - auto linalgOp = cast(genericOp.getOperation()); SmallVector loopBounds = linalgOp.computeStaticLoopSizes(); int64_t numElements = outputType.getNumElements(); @@ -155,8 +155,8 @@ class FoldConstantBase : public OpRewritePattern { SmallVector> inputDims; for (int i = 0; i < numInputs; ++i) - inputDims.push_back(getDimPositions(genericOp.getIndexingMapsArray()[i])); - auto outputDims = getDimPositions(genericOp.getIndexingMapsArray().back()); + inputDims.push_back(getDimPositions(linalgOp.getIndexingMapsArray()[i])); + auto outputDims = getDimPositions(linalgOp.getIndexingMapsArray().back()); auto outputShape = outputType.getShape(); // Allocate small vectors for index delinearization. Initial values do not @@ -173,7 +173,7 @@ class FoldConstantBase : public OpRewritePattern { APIntOrFloatArray computeFnInputs; auto inputShapes = llvm::to_vector<4>( - llvm::map_range(genericOp.getInputs(), [](Value value) { + llvm::map_range(linalgOp.getDpsInputs(), [](Value value) { return cast(value.getType()).getShape(); })); @@ -254,7 +254,7 @@ class FoldConstantBase : public OpRewritePattern { isFloat ? DenseElementsAttr::get(outputType, fpOutputValues) : DenseElementsAttr::get(outputType, intOutputValues); - rewriter.replaceOpWithNewOp(genericOp, outputAttr); + rewriter.replaceOpWithNewOp(linalgOp, outputAttr); return success(); } @@ -262,18 +262,20 @@ class FoldConstantBase : public OpRewritePattern { ControlFusionFn controlFn; }; -// Folds linalg.generic ops that are actually transposes on constant values. +// Folds linalg.transpose (and linalg.generic ops that are actually transposes) +// on constant values. struct FoldConstantTranspose : public FoldConstantBase { + using FoldConstantBase::FoldConstantBase; - bool matchIndexingMaps(GenericOp genericOp) const { + bool matchIndexingMaps(LinalgOp linalgOp) const { // We should have one input and one output. - return genericOp.getIndexingMapsArray().size() == 2; + return linalgOp.getIndexingMapsArray().size() == 2; } - RegionComputationFn getRegionComputeFn(GenericOp genericOp) const { + RegionComputationFn getRegionComputeFn(LinalgOp linalgOp) const { // Make sure the region only contains a yield op. - Block &body = genericOp.getRegion().front(); + Block &body = linalgOp->getRegion(0).front(); if (!llvm::hasSingleElement(body)) return nullptr; auto yieldOp = dyn_cast(body.getTerminator()); diff --git a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp index 65efa18af18f6..c0829397f1f85 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp @@ -351,7 +351,8 @@ static UnitExtentReplacementInfo dropUnitExtentFromOperandMetadata( auto isUnitDim = [&](unsigned dim) { if (auto dimExpr = dyn_cast(exprs[dim])) { unsigned oldPosition = dimExpr.getPosition(); - return !oldDimsToNewDimsMap.count(oldPosition); + return !oldDimsToNewDimsMap.count(oldPosition) && + (operandShape[dim] == 1); } // Handle the other case where the shape is 1, and is accessed using a // constant 0. diff --git a/mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp deleted file mode 100644 index 9dadbdbc91eca..0000000000000 --- a/mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp +++ /dev/null @@ -1,49 +0,0 @@ -//====----- Bufferize.cpp - Bufferization of shape ops ---------*- C++-*--===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/Shape/Transforms/Passes.h" - -#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" -#include "mlir/Dialect/Bufferization/IR/Bufferization.h" -#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Dialect/MemRef/IR/MemRef.h" -#include "mlir/Dialect/Shape/IR/Shape.h" -#include "mlir/Dialect/Shape/Transforms/BufferizableOpInterfaceImpl.h" -#include "mlir/Pass/Pass.h" - -namespace mlir { -#define GEN_PASS_DEF_SHAPEBUFFERIZE -#include "mlir/Dialect/Shape/Transforms/Passes.h.inc" -} // namespace mlir - -using namespace mlir; -using namespace bufferization; - -namespace { -struct ShapeBufferizePass - : public impl::ShapeBufferizeBase { - void runOnOperation() override { - BufferizationOptions options = getPartialBufferizationOptions(); - options.opFilter.allowDialect(); - - if (failed(bufferizeOp(getOperation(), options))) - signalPassFailure(); - } - - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); - shape::registerBufferizableOpInterfaceExternalModels(registry); - } -}; -} // namespace - -std::unique_ptr> mlir::createShapeBufferizePass() { - return std::make_unique(); -} diff --git a/mlir/lib/Dialect/Shape/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Shape/Transforms/CMakeLists.txt index 7c9b0d2e5e3a8..a51c6780c2866 100644 --- a/mlir/lib/Dialect/Shape/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Shape/Transforms/CMakeLists.txt @@ -1,6 +1,5 @@ add_mlir_dialect_library(MLIRShapeOpsTransforms BufferizableOpInterfaceImpl.cpp - Bufferize.cpp OutlineShapeComputation.cpp RemoveShapeConstraints.cpp ShapeToShapeLowering.cpp diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp index f57353b5892b5..b42d58634a36c 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp @@ -23,7 +23,6 @@ namespace mlir { #define GEN_PASS_DEF_SPARSEASSEMBLER -#define GEN_PASS_DEF_SPARSEENCODINGPROPAGATION #define GEN_PASS_DEF_SPARSEREINTERPRETMAP #define GEN_PASS_DEF_PRESPARSIFICATIONREWRITE #define GEN_PASS_DEF_SPARSIFICATIONPASS @@ -61,14 +60,6 @@ struct SparseAssembler : public impl::SparseAssemblerBase { } }; -struct SparseEncodingPropagation - : public impl::SparseEncodingPropagationBase { - SparseEncodingPropagation() = default; - SparseEncodingPropagation(const SparseEncodingPropagation &pass) = default; - - void runOnOperation() override {} -}; - struct SparseReinterpretMap : public impl::SparseReinterpretMapBase { SparseReinterpretMap() = default; @@ -407,10 +398,6 @@ std::unique_ptr mlir::createSparseAssembler() { return std::make_unique(); } -std::unique_ptr mlir::createSparseEncodingPropagationPass() { - return std::make_unique(); -} - std::unique_ptr mlir::createSparseReinterpretMapPass() { return std::make_unique(); } diff --git a/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp deleted file mode 100644 index d27c4576a8b7a..0000000000000 --- a/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp +++ /dev/null @@ -1,58 +0,0 @@ -//===- Bufferize.cpp - Bufferization for `tensor` dialect ops -------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements bufferization of `tensor` dialect ops -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" -#include "mlir/Dialect/Arith/IR/Arith.h" -#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" -#include "mlir/Dialect/Bufferization/IR/Bufferization.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Dialect/MemRef/IR/MemRef.h" -#include "mlir/Dialect/SCF/IR/SCF.h" -#include "mlir/Dialect/Tensor/IR/Tensor.h" -#include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h" -#include "mlir/Dialect/Tensor/Transforms/Passes.h" -#include "mlir/IR/ImplicitLocOpBuilder.h" -#include "mlir/Transforms/DialectConversion.h" - -namespace mlir { -namespace tensor { -#define GEN_PASS_DEF_TENSORBUFFERIZE -#include "mlir/Dialect/Tensor/Transforms/Passes.h.inc" -} // namespace tensor -} // namespace mlir - -using namespace mlir; -using namespace bufferization; - -namespace { -struct TensorBufferizePass - : public tensor::impl::TensorBufferizeBase { - void runOnOperation() override { - BufferizationOptions options = getPartialBufferizationOptions(); - options.opFilter.allowDialect(); - - if (failed(bufferizeOp(getOperation(), options))) - signalPassFailure(); - } - - void getDependentDialects(DialectRegistry ®istry) const override { - registry - .insert(); - tensor::registerBufferizableOpInterfaceExternalModels(registry); - } -}; -} // namespace - -std::unique_ptr mlir::tensor::createTensorBufferizePass() { - return std::make_unique(); -} diff --git a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt index 0aabdaf667b9d..ce32dea09bb0b 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt @@ -1,6 +1,5 @@ add_mlir_dialect_library(MLIRTensorTransforms BufferizableOpInterfaceImpl.cpp - Bufferize.cpp ConcatOpPatterns.cpp EmptyOpPatterns.cpp ExtractSliceFromReshapeUtils.cpp diff --git a/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt index 0e6510ba1e925..c78a74b874aff 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt @@ -7,6 +7,7 @@ add_mlir_dialect_library(MLIRTosaTransforms TosaLayerwiseConstantFoldPass.cpp TosaMakeBroadcastable.cpp TosaOptionalDecompositions.cpp + TosaTypeConverters.cpp TosaValidation.cpp ADDITIONAL_HEADER_DIRS diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaTypeConverters.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaTypeConverters.cpp new file mode 100644 index 0000000000000..d2650de8cd7f0 --- /dev/null +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaTypeConverters.cpp @@ -0,0 +1,52 @@ + +//===- TosaTypeConverters.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Type converters for lowering TOSA to linalg/arith. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Tosa/Transforms/Passes.h" + +#include "mlir/Transforms/DialectConversion.h" + +using namespace mlir; + +void mlir::tosa::populateTosaTypeConversion(TypeConverter &converter) { + converter.addConversion([&](Type type) -> std::optional { + if (type.isUnsignedInteger()) { + return IntegerType::get(type.getContext(), type.getIntOrFloatBitWidth(), + IntegerType::SignednessSemantics::Signless); + } + return type; + }); + converter.addConversion([&](TensorType type) -> std::optional { + auto converted = converter.convertType(type.getElementType()); + if (!converted) + return {}; + return type.clone(converted); + }); + converter.addSourceMaterialization([&](OpBuilder &builder, Type resultType, + ValueRange inputs, + Location loc) -> std::optional { + if (inputs.size() != 1) + return std::nullopt; + + return builder.create(loc, resultType, inputs) + .getResult(0); + }); + converter.addTargetMaterialization([&](OpBuilder &builder, Type resultType, + ValueRange inputs, + Location loc) -> std::optional { + if (inputs.size() != 1) + return std::nullopt; + + return builder.create(loc, resultType, inputs) + .getResult(0); + }); +} diff --git a/mlir/lib/Dialect/Vector/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Vector/Transforms/Bufferize.cpp deleted file mode 100644 index ee99a99b56109..0000000000000 --- a/mlir/lib/Dialect/Vector/Transforms/Bufferize.cpp +++ /dev/null @@ -1,55 +0,0 @@ -//===- Bufferize.cpp - Bufferization for `vector` dialect ops -------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements bufferization of `vector` dialect ops -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" - -#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" -#include "mlir/Dialect/Bufferization/IR/Bufferization.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Dialect/MemRef/IR/MemRef.h" -#include "mlir/Dialect/Tensor/IR/Tensor.h" -#include "mlir/Dialect/Vector/IR/VectorOps.h" -#include "mlir/Dialect/Vector/Transforms/BufferizableOpInterfaceImpl.h" -#include "mlir/Dialect/Vector/Transforms/Passes.h" - -namespace mlir { -namespace vector { -#define GEN_PASS_DEF_VECTORBUFFERIZE -#include "mlir/Dialect/Vector/Transforms/Passes.h.inc" -} // namespace vector -} // namespace mlir - -using namespace mlir; -using namespace bufferization; - -namespace { -struct VectorBufferizePass - : public vector::impl::VectorBufferizeBase { - void runOnOperation() override { - BufferizationOptions options = getPartialBufferizationOptions(); - options.opFilter.allowDialect(); - - if (failed(bufferizeOp(getOperation(), options))) - signalPassFailure(); - } - - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); - vector::registerBufferizableOpInterfaceExternalModels(registry); - } -}; -} // namespace - -std::unique_ptr mlir::vector::createVectorBufferizePass() { - return std::make_unique(); -} diff --git a/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt index c4b6abd3e2361..4dbefdd376a8b 100644 --- a/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt @@ -1,6 +1,5 @@ add_mlir_dialect_library(MLIRVectorTransforms BufferizableOpInterfaceImpl.cpp - Bufferize.cpp LowerVectorBroadcast.cpp LowerVectorContract.cpp LowerVectorGather.cpp diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp index 802a64b0805ee..156bf742f6297 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp @@ -44,6 +44,19 @@ static bool isLessThanTargetBitWidth(Operation *op, unsigned targetBitWidth) { return true; } +static bool isLessThanOrEqualTargetBitWidth(Type t, unsigned targetBitWidth) { + VectorType vecType = dyn_cast(t); + // Reject index since getElementTypeBitWidth will abort for Index types. + if (!vecType || vecType.getElementType().isIndex()) + return false; + // There are no dimension to fold if it is a 0-D vector. + if (vecType.getRank() == 0) + return false; + unsigned trailingVecDimBitWidth = + vecType.getShape().back() * vecType.getElementTypeBitWidth(); + return trailingVecDimBitWidth <= targetBitWidth; +} + namespace { struct LinearizeConstant final : OpConversionPattern { using OpConversionPattern::OpConversionPattern; @@ -355,6 +368,88 @@ struct LinearizeVectorExtract final return success(); } +private: + unsigned targetVectorBitWidth; +}; + +/// This pattern converts the InsertOp to a ShuffleOp that works on a +/// linearized vector. +/// Following, +/// vector.insert %source %destination [ position ] +/// is converted to : +/// %source_1d = vector.shape_cast %source +/// %destination_1d = vector.shape_cast %destination +/// %out_1d = vector.shuffle %destination_1d, %source_1d [ shuffle_indices_1d +/// ] %out_nd = vector.shape_cast %out_1d +/// `shuffle_indices_1d` is computed using the position of the original insert. +struct LinearizeVectorInsert final + : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + LinearizeVectorInsert( + const TypeConverter &typeConverter, MLIRContext *context, + unsigned targetVectBitWidth = std::numeric_limits::max(), + PatternBenefit benefit = 1) + : OpConversionPattern(typeConverter, context, benefit), + targetVectorBitWidth(targetVectBitWidth) {} + LogicalResult + matchAndRewrite(vector::InsertOp insertOp, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + Type dstTy = getTypeConverter()->convertType(insertOp.getDestVectorType()); + assert(!(insertOp.getDestVectorType().isScalable() || + cast(dstTy).isScalable()) && + "scalable vectors are not supported."); + + if (!isLessThanOrEqualTargetBitWidth(insertOp.getSourceType(), + targetVectorBitWidth)) + return rewriter.notifyMatchFailure( + insertOp, "Can't flatten since targetBitWidth < OpSize"); + + // dynamic position is not supported + if (insertOp.hasDynamicPosition()) + return rewriter.notifyMatchFailure(insertOp, + "dynamic position is not supported."); + auto srcTy = insertOp.getSourceType(); + auto srcAsVec = dyn_cast(srcTy); + uint64_t srcSize = 0; + if (srcAsVec) { + srcSize = srcAsVec.getNumElements(); + } else { + return rewriter.notifyMatchFailure(insertOp, + "scalars are not supported."); + } + + auto dstShape = insertOp.getDestVectorType().getShape(); + const auto dstSize = insertOp.getDestVectorType().getNumElements(); + auto dstSizeForOffsets = dstSize; + + // compute linearized offset + int64_t linearizedOffset = 0; + auto offsetsNd = insertOp.getStaticPosition(); + for (auto [dim, offset] : llvm::enumerate(offsetsNd)) { + dstSizeForOffsets /= dstShape[dim]; + linearizedOffset += offset * dstSizeForOffsets; + } + + llvm::SmallVector indices(dstSize); + auto origValsUntil = indices.begin(); + std::advance(origValsUntil, linearizedOffset); + std::iota(indices.begin(), origValsUntil, + 0); // original values that remain [0, offset) + auto newValsUntil = origValsUntil; + std::advance(newValsUntil, srcSize); + std::iota(origValsUntil, newValsUntil, + dstSize); // new values [offset, offset+srcNumElements) + std::iota(newValsUntil, indices.end(), + linearizedOffset + srcSize); // the rest of original values + // [offset+srcNumElements, end) + + rewriter.replaceOpWithNewOp( + insertOp, dstTy, adaptor.getDest(), adaptor.getSource(), + rewriter.getI64ArrayAttr(indices)); + + return success(); + } + private: unsigned targetVectorBitWidth; }; @@ -410,6 +505,6 @@ void mlir::vector::populateVectorLinearizeShuffleLikeOpsPatterns( : true; }); patterns.add( + LinearizeVectorInsert, LinearizeVectorExtractStridedSlice>( typeConverter, patterns.getContext(), targetBitWidth); } diff --git a/mlir/lib/Interfaces/InferIntRangeInterface.cpp b/mlir/lib/Interfaces/InferIntRangeInterface.cpp index b3f6c0ee3cc32..d879b93586899 100644 --- a/mlir/lib/Interfaces/InferIntRangeInterface.cpp +++ b/mlir/lib/Interfaces/InferIntRangeInterface.cpp @@ -126,3 +126,51 @@ raw_ostream &mlir::operator<<(raw_ostream &os, const ConstantIntRanges &range) { return os << "unsigned : [" << range.umin() << ", " << range.umax() << "] signed : [" << range.smin() << ", " << range.smax() << "]"; } + +IntegerValueRange IntegerValueRange::getMaxRange(Value value) { + unsigned width = ConstantIntRanges::getStorageBitwidth(value.getType()); + if (width == 0) + return {}; + + APInt umin = APInt::getMinValue(width); + APInt umax = APInt::getMaxValue(width); + APInt smin = width != 0 ? APInt::getSignedMinValue(width) : umin; + APInt smax = width != 0 ? APInt::getSignedMaxValue(width) : umax; + return IntegerValueRange{ConstantIntRanges{umin, umax, smin, smax}}; +} + +raw_ostream &mlir::operator<<(raw_ostream &os, const IntegerValueRange &range) { + range.print(os); + return os; +} + +void mlir::intrange::detail::defaultInferResultRanges( + InferIntRangeInterface interface, ArrayRef argRanges, + SetIntLatticeFn setResultRanges) { + llvm::SmallVector unpacked; + unpacked.reserve(argRanges.size()); + + for (const IntegerValueRange &range : argRanges) { + if (range.isUninitialized()) + return; + unpacked.push_back(range.getValue()); + } + + interface.inferResultRanges( + unpacked, + [&setResultRanges](Value value, const ConstantIntRanges &argRanges) { + setResultRanges(value, IntegerValueRange{argRanges}); + }); +} + +void mlir::intrange::detail::defaultInferResultRangesFromOptional( + InferIntRangeInterface interface, ArrayRef argRanges, + SetIntRangeFn setResultRanges) { + auto ranges = llvm::to_vector_of(argRanges); + interface.inferResultRangesFromOptional( + ranges, + [&setResultRanges](Value value, const IntegerValueRange &argRanges) { + if (!argRanges.isUninitialized()) + setResultRanges(value, argRanges.getValue()); + }); +} diff --git a/mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp b/mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp index fe1a67d628738..5b8d35e7bd519 100644 --- a/mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp +++ b/mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp @@ -76,7 +76,7 @@ static ConstantIntRanges minMaxBy(ConstArithFn op, ArrayRef lhs, //===----------------------------------------------------------------------===// ConstantIntRanges -mlir::intrange::inferIndexOp(InferRangeFn inferFn, +mlir::intrange::inferIndexOp(const InferRangeFn &inferFn, ArrayRef argRanges, intrange::CmpMode mode) { ConstantIntRanges sixtyFour = inferFn(argRanges); diff --git a/mlir/lib/Target/Cpp/TranslateToCpp.cpp b/mlir/lib/Target/Cpp/TranslateToCpp.cpp index 7db7163bac4ab..f19e0f8c4c2a4 100644 --- a/mlir/lib/Target/Cpp/TranslateToCpp.cpp +++ b/mlir/lib/Target/Cpp/TranslateToCpp.cpp @@ -1316,7 +1316,11 @@ LogicalResult CppEmitter::emitOperand(Value value) { FailureOr precedence = getOperatorPrecedence(def); if (failed(precedence)) return failure(); - bool encloseInParenthesis = precedence.value() < getExpressionPrecedence(); + + // Sub-expressions with equal or lower precedence need to be parenthesized, + // as they might be evaluated in the wrong order depending on the shape of + // the expression tree. + bool encloseInParenthesis = precedence.value() <= getExpressionPrecedence(); if (encloseInParenthesis) { os << "("; pushExpressionPrecedence(lowestPrecedence()); diff --git a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py index d73428a0f4df3..43410aaa6af1b 100644 --- a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py @@ -958,6 +958,41 @@ def conv_2d_ngchw_gfchw( ) * TypeFn.cast_signed(U, K[D.g, D.fg, D.c, D.kh, D.kw]) +@linalg_structured_op +def conv_2d_ngchw_gfchw_q( + I=TensorDef( + T1, S.N, S.G, S.C, S.OH * S.SH + S.KH * S.DH, S.OW * S.SW + S.KW * S.DW + ), + K=TensorDef(T2, S.G, S.FG, S.C, S.KH, S.KW), + IZp=ScalarDef(I32), + KZp=ScalarDef(I32), + O=TensorDef(U, S.N, S.G, S.FG, S.OH, S.OW, output=True), + strides=IndexAttrDef(S.SH, S.SW, default=[1, 1]), + dilations=IndexAttrDef(S.DH, S.DW, default=[1, 1]), +): + """Performs 2-D grouped convolution with zero-point offsets. + + Layout: + * Input: NGCHW. + * Kernel: GFCHW. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. This includes the zero + point offsets common to quantized operations. + """ + implements(ConvolutionOpInterface) + domain(D.n, D.g, D.fg, D.oh, D.ow, D.c, D.kh, D.kw) + O[D.n, D.g, D.fg, D.oh, D.ow] += ( + TypeFn.cast_signed( + U, I[D.n, D.g, D.c, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW] + ) + - TypeFn.cast_signed(U, IZp) + ) * ( + TypeFn.cast_signed(U, K[D.g, D.fg, D.c, D.kh, D.kw]) + - TypeFn.cast_signed(U, KZp) + ) + + @linalg_structured_op def conv_3d_ndhwc_dhwcf( I=TensorDef( diff --git a/mlir/python/mlir/dialects/scf.py b/mlir/python/mlir/dialects/scf.py index dad7377987e56..7025f6e0f1a16 100644 --- a/mlir/python/mlir/dialects/scf.py +++ b/mlir/python/mlir/dialects/scf.py @@ -132,8 +132,8 @@ def for_( iter_args = tuple(for_op.inner_iter_args) with InsertionPoint(for_op.body): if len(iter_args) > 1: - yield iv, iter_args + yield iv, iter_args, for_op.results elif len(iter_args) == 1: - yield iv, iter_args[0] + yield iv, iter_args[0], for_op.results[0] else: yield iv diff --git a/mlir/test/Conversion/ArithToEmitC/arith-to-emitc.mlir b/mlir/test/Conversion/ArithToEmitC/arith-to-emitc.mlir index b453b69a214e8..dac3fd99b607c 100644 --- a/mlir/test/Conversion/ArithToEmitC/arith-to-emitc.mlir +++ b/mlir/test/Conversion/ArithToEmitC/arith-to-emitc.mlir @@ -88,6 +88,17 @@ func.func @arith_index(%arg0: index, %arg1: index) { // ----- +// CHECK-LABEL: arith_signed_integer_div_rem +func.func @arith_signed_integer_div_rem(%arg0: i32, %arg1: i32) { + // CHECK: emitc.div %arg0, %arg1 : (i32, i32) -> i32 + %0 = arith.divsi %arg0, %arg1 : i32 + // CHECK: emitc.rem %arg0, %arg1 : (i32, i32) -> i32 + %1 = arith.remsi %arg0, %arg1 : i32 + return +} + +// ----- + func.func @arith_select(%arg0: i1, %arg1: tensor<8xi32>, %arg2: tensor<8xi32>) -> () { // CHECK: [[V0:[^ ]*]] = emitc.conditional %arg0, %arg1, %arg2 : tensor<8xi32> %0 = arith.select %arg0, %arg1, %arg2 : i1, tensor<8xi32> diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir index 8a2d8bd7967ca..a8d61a6a0f6fd 100644 --- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir +++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir @@ -2,7 +2,8 @@ // RUN: mlir-opt %s -convert-gpu-to-rocdl='index-bitwidth=32' -split-input-file | FileCheck --check-prefix=CHECK32 %s // CHECK-LABEL: @test_module -// CHECK-SAME: llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +// CHECK-SAME: llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" + gpu.module @test_module { // CHECK-LABEL: func @gpu_index_ops() // CHECK32-LABEL: func @gpu_index_ops() diff --git a/mlir/test/Conversion/TosaToTensor/tosa-to-tensor.mlir b/mlir/test/Conversion/TosaToTensor/tosa-to-tensor.mlir index 72e7e4cc84088..1e62e25176a00 100644 --- a/mlir/test/Conversion/TosaToTensor/tosa-to-tensor.mlir +++ b/mlir/test/Conversion/TosaToTensor/tosa-to-tensor.mlir @@ -420,6 +420,20 @@ func.func @test_reshape_6d_down_s2s_explicit(%arg0: tensor<1x2x3x5x7x11xf32>) -> // ----- +// CHECK-LABEL: @test_reshape_samerank_unsigned +// CHECK-SAME: (%[[ARG0:.*]]: tensor<3x2xui8>) +func.func @test_reshape_samerank_unsigned(%arg0: tensor<3x2xui8>) -> tensor<2x3xui8> { + // CHECK-NEXT: %[[CAST1:.*]] = builtin.unrealized_conversion_cast %[[ARG0]] : tensor<3x2xui8> to tensor<3x2xi8> + // CHECK-NEXT: %[[RESHAPE1:.*]] = tensor.collapse_shape %[[CAST1]] {{\[}}[0, 1]] : tensor<3x2xi8> into tensor<6xi8> + // CHECK-NEXT: %[[RESHAPE2:.*]] = tensor.expand_shape %[[RESHAPE1]] {{\[}}[0, 1]] output_shape {{\[}}2, 3] : tensor<6xi8> into tensor<2x3xi8> + // CHECK-NEXT: %[[CAST2:.*]] = builtin.unrealized_conversion_cast %[[RESHAPE2]] : tensor<2x3xi8> to tensor<2x3xui8 + %0 = "tosa.reshape"(%arg0) {new_shape = array} : (tensor<3x2xui8>) -> tensor<2x3xui8> + // CHECK-NEXT: return %[[CAST2]] + return %0 : tensor<2x3xui8> +} + +// ----- + // CHECK-LABEL: func @slice func.func @slice(%arg0: tensor<6xf32>) ->() { // CHECK: [[SLICE:%.+]] = tensor.extract_slice %arg0[2] [1] [1] diff --git a/mlir/test/Dialect/Arith/bufferize.mlir b/mlir/test/Dialect/Arith/bufferize.mlir index 944954e9e4edd..a3b1454fb68f6 100644 --- a/mlir/test/Dialect/Arith/bufferize.mlir +++ b/mlir/test/Dialect/Arith/bufferize.mlir @@ -1,5 +1,4 @@ -// RUN: mlir-opt %s -arith-bufferize -split-input-file -verify-diagnostics | FileCheck %s -// RUN: mlir-opt %s -arith-bufferize=alignment=64 -split-input-file -verify-diagnostics | FileCheck --check-prefix=ALIGNED %s +// RUN: mlir-opt %s --one-shot-bufferize="dialect-filter=arith,bufferization copy-before-write unknown-type-conversion=identity-layout-map" -split-input-file -verify-diagnostics | FileCheck %s // CHECK-LABEL: func @index_cast( // CHECK-SAME: %[[TENSOR:.*]]: tensor, %[[SCALAR:.*]]: i32 @@ -22,10 +21,7 @@ func.func @index_cast(%tensor: tensor, %scalar: i32) -> (tensor, ind // The name isn't load-bearing though. // CHECK: memref.global "private" constant @__constant_3x4xf32 : memref<3x4xf32> = dense<7.000000e+00> -// CHECK-NOT: alignment - -// ALIGNED: memref.global "private" constant @__constant_3x4xf32 : memref<3x4xf32> = dense<7.000000e+00> -// ALIGNED-SAME: {alignment = 64 : i64} +// CHECK-SAME: {alignment = 64 : i64} // CHECK: @basic func.func @basic() -> tensor<3x4xf32> { diff --git a/mlir/test/Dialect/Arith/canonicalize.mlir b/mlir/test/Dialect/Arith/canonicalize.mlir index 1a387c20c4b29..e4f95bb0545a2 100644 --- a/mlir/test/Dialect/Arith/canonicalize.mlir +++ b/mlir/test/Dialect/Arith/canonicalize.mlir @@ -2950,14 +2950,6 @@ func.func @unsignedExtendConstantResource() -> tensor { return %ext : tensor } -// Just checks that this doesn't crash. -// CHECK-LABEL: @signedExtendSplatAsDynamicShape -func.func @signedExtendSplatAsDynamicShape() -> tensor { - %splat = arith.constant dense<5> : tensor<2xi16> - %extsplat = arith.extsi %splat : tensor<2xi16> to tensor - return %extsplat : tensor -} - // CHECK-LABEL: @extsi_i0 // CHECK: %[[ZERO:.*]] = arith.constant 0 : i16 // CHECK: return %[[ZERO]] : i16 diff --git a/mlir/test/Dialect/Arith/int-range-interface.mlir b/mlir/test/Dialect/Arith/int-range-interface.mlir index 5b538197a0c11..60f0ab41afa48 100644 --- a/mlir/test/Dialect/Arith/int-range-interface.mlir +++ b/mlir/test/Dialect/Arith/int-range-interface.mlir @@ -899,3 +899,22 @@ func.func @test_shl_i8_nowrap() -> i8 { %2 = test.reflect_bounds %1 : i8 return %2: i8 } + +/// A test case to ensure that the ranges for unsupported ops are initialized +/// properly to maxRange, rather than left uninitialized. +/// In this test case, the previous behavior would leave the ranges for %a and +/// %b uninitialized, resulting in arith.cmpf's range not being updated, even +/// though it has an integer valued result. + +// CHECK-LABEL: func @test_cmpf_propagates +// CHECK: test.reflect_bounds {smax = 2 : index, smin = 1 : index, umax = 2 : index, umin = 1 : index} +func.func @test_cmpf_propagates(%a: f32, %b: f32) -> index { + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + + %0 = arith.cmpf ueq, %a, %b : f32 + %1 = arith.select %0, %c1, %c2 : index + %2 = test.reflect_bounds %1 : index + func.return %2 : index +} + diff --git a/mlir/test/Dialect/Arith/invalid.mlir b/mlir/test/Dialect/Arith/invalid.mlir index ada849220bb83..652aa738ad392 100644 --- a/mlir/test/Dialect/Arith/invalid.mlir +++ b/mlir/test/Dialect/Arith/invalid.mlir @@ -1,13 +1,21 @@ // RUN: mlir-opt -split-input-file %s -verify-diagnostics func.func @test_index_cast_shape_error(%arg0 : tensor) -> tensor<2xi64> { - // expected-error @+1 {{'arith.index_cast' op requires the same shape for all operands and results}} + // expected-error @+1 {{'arith.index_cast' op failed to verify that input and output have the same tensor dimensions}} %0 = arith.index_cast %arg0 : tensor to tensor<2xi64> return %0 : tensor<2xi64> } // ----- +func.func @test_index_cast_shape_dim_error(%arg0 : tensor<2xindex>) -> tensor { + // expected-error @+1 {{'arith.index_cast' op failed to verify that input and output have the same tensor dimensions}} + %0 = arith.index_cast %arg0 : tensor<2xindex> to tensor + return %0 : tensor +} + +// ----- + func.func @test_index_cast_tensor_error(%arg0 : tensor) -> i64 { // expected-error @+1 {{'arith.index_cast' op requires the same shape for all operands and results}} %0 = arith.index_cast %arg0 : tensor to i64 @@ -655,6 +663,14 @@ func.func @extsi_scalable_to_fl(%arg0 : vector<[4]xi32>) { // ----- +func.func @extsi_tensor_dim(%arg0 : tensor<4xi32>) { + // expected-error@+1 {{'arith.extsi' op failed to verify that input and output have the same tensor dimensions}} + %0 = arith.extsi %arg0 : tensor<4xi32> to tensor + return +} + +// ----- + func.func @extf_scalable_to_fl(%arg0 : vector<[4]xf32>) { // expected-error@+1 {{'arith.extf' op requires the same shape for all operands and results}} %0 = arith.extf %arg0 : vector<[4]xf32> to vector<4xf64> @@ -703,6 +719,22 @@ func.func @bitcast_scalable_to_fl(%arg0 : vector<[4]xf32>) { // ----- +func.func @bitcast_tensor_dim(%arg0 : tensor<4xf32>) { + // expected-error@+1 {{'arith.bitcast' op failed to verify that input and output have the same tensor dimensions}} + %0 = arith.bitcast %arg0 : tensor<4xf32> to tensor + return +} + +// ----- + +func.func @bitcast_tensor_dim(%arg0 : tensor) { + // expected-error@+1 {{'arith.bitcast' op failed to verify that input and output have the same tensor dimensions}} + %0 = arith.bitcast %arg0 : tensor to tensor<4xi32> + return +} + +// ----- + func.func @trunci_fl_to_scalable(%arg0 : vector<4xi32>) { // expected-error@+1 {{'arith.trunci' op requires the same shape for all operands and results}} %0 = arith.trunci %arg0 : vector<4xi32> to vector<[4]xi8> @@ -719,6 +751,14 @@ func.func @truncf_fl_to_scalable(%arg0 : vector<4xf64>) { // ----- +func.func @truncf_tensor_dim(%arg0 : tensor<4xf64>) { + // expected-error@+1 {{'arith.truncf' op failed to verify that input and output have the same tensor dimensions}} + %0 = arith.truncf %arg0 : tensor<4xf64> to tensor + return +} + +// ----- + func.func @extui_fl_to_scalable(%arg0 : vector<4xi32>) { // expected-error@+1 {{'arith.extui' op requires the same shape for all operands and results}} %0 = arith.extui %arg0 : vector<4xi32> to vector<[4]xi64> diff --git a/mlir/test/Dialect/GPU/dynamic-shared-memory.mlir b/mlir/test/Dialect/GPU/dynamic-shared-memory.mlir index fb45faaa712f7..d73125fd763e6 100644 --- a/mlir/test/Dialect/GPU/dynamic-shared-memory.mlir +++ b/mlir/test/Dialect/GPU/dynamic-shared-memory.mlir @@ -3,11 +3,11 @@ gpu.module @modules { // CHECK: llvm.mlir.global internal @__dynamic_shmem__3() {addr_space = 3 : i32, alignment = 16 : i64} : !llvm.array<0 x i8> llvm.mlir.global internal @__dynamic_shmem__0() {addr_space = 3 : i32, alignment = 4 : i64} : !llvm.array<0 x i8> - llvm.mlir.global internal @__dynamic_shmem__1() {addr_space = 3 : i32, alignment = 4 : i64} : !llvm.array<0 x i8> - llvm.mlir.global internal @__dynamic_shmem__2() {alignment = 16 : i64} : !llvm.array<0 x i8> + llvm.mlir.global internal @__dynamic_shmem__1() {addr_space = 3 : i32, alignment = 4 : i64} : !llvm.array<0 x i8> + llvm.mlir.global internal @__dynamic_shmem__2() {alignment = 16 : i64} : !llvm.array<0 x i8> // CHECK-LABEL: llvm.func @dynamic_shared_memory_kernel( // CHECK-SAME: %[[arg0:.+]]: i64) - gpu.func @dynamic_shared_memory_kernel(%d : index) kernel attributes {gpu.known_block_size = array, gpu.known_grid_size = array} { + gpu.func @dynamic_shared_memory_kernel(%d : index) kernel attributes {gpu.known_block_size = array, gpu.known_grid_size = array} { %c1 = arith.constant 1 : index %c8192 = arith.constant 8192 : index %c16384 = arith.constant 16384 : index @@ -19,83 +19,83 @@ gpu.module @modules { %1 = memref.view %shmem[%c16384][] : memref> to memref<32x64xf32, #gpu.address_space> "test.use.shared.memory"(%1) : (memref<32x64xf32, #gpu.address_space>) -> () - -// CHECK: %[[S0:.+]] = llvm.mlir.constant(32 : index) : i64 -// CHECK: %[[S1:.+]] = llvm.mlir.constant(64 : index) : i64 -// CHECK: %[[S2:.+]] = llvm.mlir.constant(1 : index) : i64 -// CHECK: %[[S3:.+]] = llvm.mlir.constant(0 : index) : i64 -// CHECK: %[[S4:.+]] = llvm.mlir.addressof @__dynamic_shmem__3 : !llvm.ptr<3> -// CHECK: %[[S5:.+]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S6:.+]] = llvm.insertvalue %[[S4]], %[[S5]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S7:.+]] = llvm.getelementptr %[[S4]][8192] : (!llvm.ptr<3>) -> !llvm.ptr<3>, i8 -// CHECK: %[[S8:.+]] = llvm.insertvalue %[[S7]], %[[S6]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S9:.+]] = llvm.insertvalue %[[S3]], %[[S8]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S10:.+]] = llvm.insertvalue %[[S1]], %[[S9]][3, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S11:.+]] = llvm.insertvalue %[[S2]], %[[S10]][4, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S12:.+]] = llvm.insertvalue %[[S0]], %[[S11]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S13:.+]] = llvm.insertvalue %[[S1]], %[[S12]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S14:.+]] = builtin.unrealized_conversion_cast %[[S13]] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> to memref<32x64xf32, #gpu.address_space> -// CHECK: "test.use.shared.memory"(%[[S14]]) : (memref<32x64xf32, #gpu.address_space>) -> () -// CHECK: %[[S15:.+]] = llvm.getelementptr %4[16384] : (!llvm.ptr<3>) -> !llvm.ptr<3>, i8 -// CHECK: %[[S16:.+]] = llvm.insertvalue %[[S15]], %[[S6]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S17:.+]] = llvm.insertvalue %[[S3]], %[[S16]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S18:.+]] = llvm.insertvalue %[[S1]], %[[S17]][3, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S19:.+]] = llvm.insertvalue %[[S2]], %[[S18]][4, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S20:.+]] = llvm.insertvalue %[[S0]], %[[S19]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S21:.+]] = llvm.insertvalue %[[S1]], %[[S20]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S22:.+]] = builtin.unrealized_conversion_cast %[[S21]] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> to memref<32x64xf32, #gpu.address_space> -// CHECK: "test.use.shared.memory"(%[[S22]]) : (memref<32x64xf32, #gpu.address_space>) -> () + +// CHECK-DAG: %[[S0:.+]] = llvm.mlir.constant(32 : index) : i64 +// CHECK-DAG: %[[S1:.+]] = llvm.mlir.constant(64 : index) : i64 +// CHECK-DAG: %[[S2:.+]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK-DAG: %[[S3:.+]] = llvm.mlir.constant(1 : index) : i64 +// CHECK-DAG: %[[S4:.+]] = llvm.mlir.constant(0 : index) : i64 +// CHECK-DAG: %[[S5:.+]] = llvm.mlir.addressof @__dynamic_shmem__3 : !llvm.ptr<3> +// CHECK: %[[S6:.+]] = llvm.insertvalue %[[S5]], %[[S2]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S7:.+]] = llvm.getelementptr %[[S5]][8192] : (!llvm.ptr<3>) -> !llvm.ptr<3>, i8 +// CHECK: %[[S8:.+]] = llvm.insertvalue %[[S7]], %[[S6]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S9:.+]] = llvm.insertvalue %[[S4]], %[[S8]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S10:.+]] = llvm.insertvalue %[[S1]], %[[S9]][3, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S11:.+]] = llvm.insertvalue %[[S3]], %[[S10]][4, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S12:.+]] = llvm.insertvalue %[[S0]], %[[S11]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S13:.+]] = llvm.insertvalue %[[S1]], %[[S12]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S14:.+]] = builtin.unrealized_conversion_cast %[[S13]] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> to memref<32x64xf32, #gpu.address_space> +// CHECK: "test.use.shared.memory"(%[[S14]]) : (memref<32x64xf32, #gpu.address_space>) -> () +// CHECK: %[[S15:.+]] = llvm.getelementptr %[[S5]][16384] : (!llvm.ptr<3>) -> !llvm.ptr<3>, i8 +// CHECK: %[[S16:.+]] = llvm.insertvalue %[[S15]], %[[S6]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S17:.+]] = llvm.insertvalue %[[S4]], %[[S16]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S18:.+]] = llvm.insertvalue %[[S1]], %[[S17]][3, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S19:.+]] = llvm.insertvalue %[[S3]], %[[S18]][4, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S20:.+]] = llvm.insertvalue %[[S0]], %[[S19]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S21:.+]] = llvm.insertvalue %[[S1]], %[[S20]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S22:.+]] = builtin.unrealized_conversion_cast %[[S21]] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> to memref<32x64xf32, #gpu.address_space> +// CHECK: "test.use.shared.memory"(%[[S22]]) : (memref<32x64xf32, #gpu.address_space>) -> () gpu.return } // CHECK-LABEL: llvm.func @gpu_device_function - gpu.func @gpu_device_function() { + gpu.func @gpu_device_function() { %c8192 = arith.constant 8192 : index %shmem = gpu.dynamic_shared_memory : memref> %0 = memref.view %shmem[%c8192][] : memref> to memref<32x64xf32, #gpu.address_space> "test.use.shared.memory"(%0) : (memref<32x64xf32, #gpu.address_space>) -> () -// CHECK: %[[S0:.+]] = llvm.mlir.constant(32 : index) : i64 -// CHECK: %[[S1:.+]] = llvm.mlir.constant(64 : index) : i64 -// CHECK: %[[S2:.+]] = llvm.mlir.constant(1 : index) : i64 -// CHECK: %[[S3:.+]] = llvm.mlir.constant(0 : index) : i64 -// CHECK: %[[S4:.+]] = llvm.mlir.addressof @__dynamic_shmem__3 : !llvm.ptr<3> -// CHECK: %[[S5:.+]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S6:.+]] = llvm.insertvalue %[[S4]], %[[S5]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S7:.+]] = llvm.getelementptr %[[S4]][8192] : (!llvm.ptr<3>) -> !llvm.ptr<3>, i8 -// CHECK: %[[S8:.+]] = llvm.insertvalue %[[S7]], %[[S6]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S9:.+]] = llvm.insertvalue %[[S3]], %[[S8]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S10:.+]] = llvm.insertvalue %[[S1]], %[[S9]][3, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S11:.+]] = llvm.insertvalue %[[S2]], %[[S10]][4, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S12:.+]] = llvm.insertvalue %[[S0]], %[[S11]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S13:.+]] = llvm.insertvalue %[[S1]], %[[S12]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S14:.+]] = builtin.unrealized_conversion_cast %13 : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> to memref<32x64xf32, #gpu.address_space> -// CHECK: "test.use.shared.memory"(%[[S14]]) : (memref<32x64xf32, #gpu.address_space>) -> () +// CHECK-DAG: %[[S0:.+]] = llvm.mlir.constant(32 : index) : i64 +// CHECK-DAG: %[[S1:.+]] = llvm.mlir.constant(64 : index) : i64 +// CHECK-DAG: %[[S2:.+]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK-DAG: %[[S3:.+]] = llvm.mlir.constant(1 : index) : i64 +// CHECK-DAG: %[[S4:.+]] = llvm.mlir.constant(0 : index) : i64 +// CHECK-DAG: %[[S5:.+]] = llvm.mlir.addressof @__dynamic_shmem__3 : !llvm.ptr<3> +// CHECK: %[[S6:.+]] = llvm.insertvalue %[[S5]], %[[S2]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S7:.+]] = llvm.getelementptr %[[S5]][8192] : (!llvm.ptr<3>) -> !llvm.ptr<3>, i8 +// CHECK: %[[S8:.+]] = llvm.insertvalue %[[S7]], %[[S6]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S9:.+]] = llvm.insertvalue %[[S4]], %[[S8]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S10:.+]] = llvm.insertvalue %[[S1]], %[[S9]][3, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S11:.+]] = llvm.insertvalue %[[S3]], %[[S10]][4, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S12:.+]] = llvm.insertvalue %[[S0]], %[[S11]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S13:.+]] = llvm.insertvalue %[[S1]], %[[S12]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S14:.+]] = builtin.unrealized_conversion_cast %13 : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> to memref<32x64xf32, #gpu.address_space> +// CHECK: "test.use.shared.memory"(%[[S14]]) : (memref<32x64xf32, #gpu.address_space>) -> () gpu.return } // CHECK-LABEL: llvm.func @func_device_function - func.func @func_device_function() { + func.func @func_device_function() { %c8192 = arith.constant 8192 : index %shmem = gpu.dynamic_shared_memory : memref> %0 = memref.view %shmem[%c8192][] : memref> to memref<32x64xf32, #gpu.address_space> "test.use.shared.memory"(%0) : (memref<32x64xf32, #gpu.address_space>) -> () -// CHECK: %[[S0:.+]] = llvm.mlir.constant(32 : index) : i64 -// CHECK: %[[S1:.+]] = llvm.mlir.constant(64 : index) : i64 -// CHECK: %[[S2:.+]] = llvm.mlir.constant(1 : index) : i64 -// CHECK: %[[S3:.+]] = llvm.mlir.constant(0 : index) : i64 -// CHECK: %[[S4:.+]] = llvm.mlir.addressof @__dynamic_shmem__3 : !llvm.ptr<3> -// CHECK: %[[S5:.+]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S6:.+]] = llvm.insertvalue %[[S4]], %[[S5]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S7:.+]] = llvm.getelementptr %[[S4]][8192] : (!llvm.ptr<3>) -> !llvm.ptr<3>, i8 -// CHECK: %[[S8:.+]] = llvm.insertvalue %[[S7]], %[[S6]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S9:.+]] = llvm.insertvalue %[[S3]], %[[S8]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S10:.+]] = llvm.insertvalue %[[S1]], %[[S9]][3, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S11:.+]] = llvm.insertvalue %[[S2]], %[[S10]][4, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S12:.+]] = llvm.insertvalue %[[S0]], %[[S11]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S13:.+]] = llvm.insertvalue %[[S1]], %[[S12]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[S14:.+]] = builtin.unrealized_conversion_cast %13 : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> to memref<32x64xf32, #gpu.address_space> -// CHECK: "test.use.shared.memory"(%[[S14]]) : (memref<32x64xf32, #gpu.address_space>) -> () +// CHECK-DAG: %[[S0:.+]] = llvm.mlir.constant(32 : index) : i64 +// CHECK-DAG: %[[S1:.+]] = llvm.mlir.constant(64 : index) : i64 +// CHECK-DAG: %[[S2:.+]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK-DAG: %[[S3:.+]] = llvm.mlir.constant(1 : index) : i64 +// CHECK-DAG: %[[S4:.+]] = llvm.mlir.constant(0 : index) : i64 +// CHECK-DAG: %[[S5:.+]] = llvm.mlir.addressof @__dynamic_shmem__3 : !llvm.ptr<3> +// CHECK: %[[S6:.+]] = llvm.insertvalue %[[S5]], %[[S2]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S7:.+]] = llvm.getelementptr %[[S5]][8192] : (!llvm.ptr<3>) -> !llvm.ptr<3>, i8 +// CHECK: %[[S8:.+]] = llvm.insertvalue %[[S7]], %[[S6]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S9:.+]] = llvm.insertvalue %[[S4]], %[[S8]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S10:.+]] = llvm.insertvalue %[[S1]], %[[S9]][3, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S11:.+]] = llvm.insertvalue %[[S3]], %[[S10]][4, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S12:.+]] = llvm.insertvalue %[[S0]], %[[S11]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S13:.+]] = llvm.insertvalue %[[S1]], %[[S12]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[S14:.+]] = builtin.unrealized_conversion_cast %13 : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> to memref<32x64xf32, #gpu.address_space> +// CHECK: "test.use.shared.memory"(%[[S14]]) : (memref<32x64xf32, #gpu.address_space>) -> () func.return } diff --git a/mlir/test/Dialect/LLVMIR/constant-folding.mlir b/mlir/test/Dialect/LLVMIR/constant-folding.mlir index 454126321eb97..497d679a12a09 100644 --- a/mlir/test/Dialect/LLVMIR/constant-folding.mlir +++ b/mlir/test/Dialect/LLVMIR/constant-folding.mlir @@ -101,3 +101,71 @@ llvm.func @addressof_blocks(%arg: i1) -> !llvm.ptr { } llvm.mlir.global constant @foo() : i32 + +// ----- + +// CHECK-LABEL: llvm.func @undef +llvm.func @undef() { + // CHECK-NEXT: %[[UNDEF:.+]] = llvm.mlir.undef : i32 + %undef1 = llvm.mlir.undef : i32 + %undef2 = llvm.mlir.undef : i32 + // CHECK-NEXT: llvm.call @foo(%[[UNDEF]], %[[UNDEF]]) + llvm.call @foo(%undef1, %undef2) : (i32, i32) -> () + // CHECK-NEXT: llvm.return + llvm.return +} + +llvm.func @foo(i32, i32) + +// ----- + +// CHECK-LABEL: llvm.func @poison +llvm.func @poison() { + // CHECK-NEXT: %[[POISON:.+]] = llvm.mlir.poison : i32 + %poison1 = llvm.mlir.poison : i32 + %poison2 = llvm.mlir.poison : i32 + // CHECK-NEXT: llvm.call @foo(%[[POISON]], %[[POISON]]) + llvm.call @foo(%poison1, %poison2) : (i32, i32) -> () + // CHECK-NEXT: llvm.return + llvm.return +} + +llvm.func @foo(i32, i32) + +// ----- + +llvm.func @foo(!llvm.ptr, !llvm.ptr) + +// CHECK-LABEL: llvm.func @null_pointer +llvm.func @null_pointer() { + // CHECK-NEXT: %[[NULLPTR:.+]] = llvm.mlir.zero : !llvm.ptr + %nullptr1 = llvm.mlir.zero : !llvm.ptr + %nullptr2 = llvm.mlir.zero : !llvm.ptr + // CHECK-NEXT: llvm.call @foo(%[[NULLPTR]], %[[NULLPTR]]) + llvm.call @foo(%nullptr1, %nullptr2) : (!llvm.ptr, !llvm.ptr) -> () + // CHECK-NEXT: llvm.return + llvm.return +} + +// ----- + +// CHECK-LABEL: llvm.func @zero_integer +llvm.func @zero_integer() -> i64 { + // CHECK-NEXT: %[[ZERO:.+]] = llvm.mlir.constant(0 : i64) : i64 + %zero = llvm.mlir.zero : i32 + %zero_extended = llvm.zext %zero : i32 to i64 + // CHECK-NEXT: llvm.return %[[ZERO]] + llvm.return %zero_extended : i64 +} + +// ----- + +// CHECK-LABEL: llvm.func @null_pointer_select +llvm.func @null_pointer_select(%cond: i1) -> !llvm.ptr { + // CHECK-NEXT: %[[NULLPTR:.+]] = llvm.mlir.zero : !llvm.ptr + %nullptr1 = llvm.mlir.zero : !llvm.ptr + %nullptr2 = llvm.mlir.zero : !llvm.ptr + %result = arith.select %cond, %nullptr1, %nullptr2 : !llvm.ptr + // CHECK-NEXT: llvm.return %[[NULLPTR]] + llvm.return %result : !llvm.ptr +} diff --git a/mlir/test/Dialect/Linalg/bufferize.mlir b/mlir/test/Dialect/Linalg/bufferize.mlir index 29f27e6838e66..e8ab1184b1fd2 100644 --- a/mlir/test/Dialect/Linalg/bufferize.mlir +++ b/mlir/test/Dialect/Linalg/bufferize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -linalg-bufferize -canonicalize -cse -split-input-file %s | FileCheck %s +// RUN: mlir-opt --one-shot-bufferize="dialect-filter=linalg,bufferization copy-before-write unknown-type-conversion=identity-layout-map" -canonicalize -cse -split-input-file %s | FileCheck %s #map0 = affine_map<(d0) -> (d0)> @@ -189,31 +189,3 @@ func.func @bufferize_dot(%in: tensor<4xf32>, %out: tensor) -> tensor { // CHECK: %[[OUT_TENSOR:.*]] = bufferization.to_tensor %[[ALLOC]] : memref // CHECK: return %[[OUT_TENSOR]] } - -// ----- - -// This is a regression test. The linalg-bufferize pass should ignore all func -// dialect ops. - -// CHECK-LABEL: func private @csum(tensor<6xi64>) -> tensor<6xi64> -func.func private @csum(%arg0: tensor<6xi64>) -> tensor<6xi64> - -// CHECK: func public @main(%[[arg0:.*]]: tensor<2x3xi1>) -// CHECK: %[[collapse:.*]] = tensor.collapse_shape %[[arg0]] -// CHECK: %[[collapse_m:.*]] = bufferization.to_memref %[[collapse]] -// CHECK: %[[alloc:.*]] = memref.alloc() -// CHECK: linalg.generic {{.*}} ins(%[[collapse_m]] : memref<6xi1>) outs(%[[alloc]] : memref<6xi64>) -// CHECK: %[[generic_t:.*]] = bufferization.to_tensor %[[alloc]] -// CHECK: %[[call:.*]] = call @csum(%[[generic_t]]) -// CHECK: return %[[call]] -func.func public @main(%arg0: tensor<2x3xi1>) -> tensor<6xi64> { - %0 = tensor.collapse_shape %arg0 [[0, 1]] : tensor<2x3xi1> into tensor<6xi1> - %1 = tensor.empty() : tensor<6xi64> - %2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<6xi1>) outs(%1 : tensor<6xi64>) { - ^bb0(%arg1: i1, %arg2: i64): - %4 = arith.extui %arg1 : i1 to i64 - linalg.yield %4 : i64 - } -> tensor<6xi64> - %3 = func.call @csum(%2) : (tensor<6xi64>) -> tensor<6xi64> - return %3 : tensor<6xi64> -} diff --git a/mlir/test/Dialect/Linalg/constant-fold.mlir b/mlir/test/Dialect/Linalg/constant-fold.mlir new file mode 100644 index 0000000000000..3929c26a3382f --- /dev/null +++ b/mlir/test/Dialect/Linalg/constant-fold.mlir @@ -0,0 +1,148 @@ +// RUN: mlir-opt %s -linalg-fuse-elementwise-ops -split-input-file | FileCheck %s + +// CHECK-LABEL: @transpose_fold_2d_fp32 +func.func @transpose_fold_2d_fp32(%init: tensor<3x2xf32>) -> tensor<3x2xf32> { + %input = arith.constant dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]> : tensor<2x3xf32> + // CHECK: %[[CST:.+]] = arith.constant + // CHECK-SAME{LITERAL}: dense<[[0.000000e+00, 3.000000e+00], [1.000000e+00, 4.000000e+00], [2.000000e+00, 5.000000e+00]]> : tensor<3x2xf32> + %1 = linalg.generic { + indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], + iterator_types = ["parallel", "parallel"] + } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) { + ^bb0(%arg1: f32, %arg2: f32): + linalg.yield %arg1 : f32 + } -> tensor<3x2xf32> + // CHECK: return %[[CST]] + return %1 : tensor<3x2xf32> +} + +// ----- + +// CHECK-LABEL: @transpose_fold_2d_fp64 +func.func @transpose_fold_2d_fp64(%init: tensor<3x2xf64>) -> tensor<3x2xf64> { + %input = arith.constant dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]> : tensor<2x3xf64> + // CHECK: %[[CST:.+]] = arith.constant + // CHECK-SAME{LITERAL}: dense<[[0.000000e+00, 3.000000e+00], [1.000000e+00, 4.000000e+00], [2.000000e+00, 5.000000e+00]]> : tensor<3x2xf64> + %1 = linalg.generic { + indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], + iterator_types = ["parallel", "parallel"] + } ins(%input : tensor<2x3xf64>) outs(%init : tensor<3x2xf64>) { + ^bb0(%arg1: f64, %arg2: f64): + linalg.yield %arg1 : f64 + } -> tensor<3x2xf64> + // CHECK: return %[[CST]] + return %1 : tensor<3x2xf64> +} + +// ----- + +// CHECK-LABEL: @transpose_fold_4d_i32 +func.func @transpose_fold_4d_i32(%init: tensor<3x1x4x2xi32>) -> tensor<3x1x4x2xi32> { + %input = arith.constant dense<[[ + [[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11]], + [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]] + ]]> : tensor<1x2x3x4xi32> + // CHECK: %[[CST:.+]] = arith.constant dense<[ + // CHECK-SAME{LITERAL}: [[[0, 12], [1, 13], [2, 14], [3, 15]]], + // CHECK-SAME{LITERAL}: [[[4, 16], [5, 17], [6, 18], [7, 19]]], + // CHECK-SAME{LITERAL}: [[[8, 20], [9, 21], [10, 22], [11, 23]]] + // CHECK-SAME{LITERAL}: ]> + %1 = linalg.generic { + indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d0, d3, d1)>], + iterator_types = ["parallel", "parallel", "parallel", "parallel"] + } ins(%input : tensor<1x2x3x4xi32>) outs(%init : tensor<3x1x4x2xi32>) { + ^bb0(%arg1: i32, %arg2: i32): + linalg.yield %arg1 : i32 + } -> tensor<3x1x4x2xi32> + // CHECK: return %[[CST]] + return %1 : tensor<3x1x4x2xi32> +} + +// ----- + +// CHECK-LABEL: @transpose_fold_4d_i16 +func.func @transpose_fold_4d_i16(%init: tensor<3x1x4x2xi16>) -> tensor<3x1x4x2xi16> { + %input = arith.constant dense<[[ + [[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11]], + [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]] + ]]> : tensor<1x2x3x4xi16> + // CHECK: %[[CST:.+]] = arith.constant dense<[ + // CHECK-SAME{LITERAL}: [[[0, 12], [1, 13], [2, 14], [3, 15]]], + // CHECK-SAME{LITERAL}: [[[4, 16], [5, 17], [6, 18], [7, 19]]], + // CHECK-SAME{LITERAL}: [[[8, 20], [9, 21], [10, 22], [11, 23]]] + // CHECK-SAME{LITERAL}: ]> + %1 = linalg.generic { + indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d0, d3, d1)>], + iterator_types = ["parallel", "parallel", "parallel", "parallel"] + } ins(%input : tensor<1x2x3x4xi16>) outs(%init : tensor<3x1x4x2xi16>) { + ^bb0(%arg1: i16, %arg2: i16): + linalg.yield %arg1 : i16 + } -> tensor<3x1x4x2xi16> + // CHECK: return %[[CST]] + return %1 : tensor<3x1x4x2xi16> +} + +// ----- + +// CHECK-LABEL: @transpose_nofold_non_cst_input +func.func @transpose_nofold_non_cst_input(%input: tensor<2x3xf32>, %init: tensor<3x2xf32>) -> tensor<3x2xf32> { + // CHECK: linalg.generic + %1 = linalg.generic { + indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], + iterator_types = ["parallel", "parallel"] + } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) { + ^bb0(%arg1: f32, %arg2: f32): + linalg.yield %arg1 : f32 + } -> tensor<3x2xf32> + return %1 : tensor<3x2xf32> +} + +// ----- + +// CHECK-LABEL: @transpose_nofold_yield_const +func.func @transpose_nofold_yield_const(%init: tensor<3x2xf32>) -> tensor<3x2xf32> { + %input = arith.constant dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]> : tensor<2x3xf32> + %cst = arith.constant 8.0 : f32 + // CHECK: linalg.generic + %1 = linalg.generic { + indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], + iterator_types = ["parallel", "parallel"] + } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) { + ^bb0(%arg1: f32, %arg2: f32): + linalg.yield %cst : f32 + } -> tensor<3x2xf32> + return %1 : tensor<3x2xf32> +} + +// ----- + +// CHECK-LABEL: @transpose_nofold_multi_ops_in_region +func.func @transpose_nofold_multi_ops_in_region(%init: tensor<3x2xf32>) -> tensor<3x2xf32> { + %input = arith.constant dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]> : tensor<2x3xf32> + // CHECK: linalg.generic + %1 = linalg.generic { + indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], + iterator_types = ["parallel", "parallel"] + } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) { + ^bb0(%arg1: f32, %arg2: f32): + %add = arith.addf %arg1, %arg1 : f32 + linalg.yield %add : f32 + } -> tensor<3x2xf32> + return %1 : tensor<3x2xf32> +} + +// ----- + +// CHECK-LABEL: @named_transpose_fold_2d_fp32 +func.func @named_transpose_fold_2d_fp32(%init: tensor<3x2xf32>) -> tensor<3x2xf32> { + %input = arith.constant dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]> : tensor<2x3xf32> + // CHECK: %[[CST:.+]] = arith.constant + // CHECK-SAME{LITERAL}: dense<[[0.000000e+00, 3.000000e+00], [1.000000e+00, 4.000000e+00], [2.000000e+00, 5.000000e+00]]> : tensor<3x2xf32> + %1 = linalg.transpose ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) permutation = [1, 0] + // CHECK: return %[[CST]] + return %1 : tensor<3x2xf32> +} + +// ----- + + diff --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir index a9cbaaf7fdc48..8f9b12880adcf 100644 --- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir +++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir @@ -1087,3 +1087,46 @@ func.func @drop_known_unit_constant_low_high(%arg0: tensor<1x383x128xf32>) -> te // CHECK: } : tensor<383x128xf32> to tensor<384x128xf32> // CHECK: tensor.expand_shape %[[PADDED]] // CHECK-SAME: {{\[}}[0, 1], [2]] output_shape [1, 384, 128] : tensor<384x128xf32> into tensor<1x384x128xf32> + +// ----- + +// CHECK: #[[$MAP0:.+]] = affine_map<()[s0, s1] -> (s0 * s1)> +// CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> (0, d0)> +// CHECK: #[[$MAP2:.+]] = affine_map<(d0) -> ()> + +// CHECK-LABEL: func @drop_unit_dim_corresponding_to_dynamic_dim +// CHECK-SAME: %[[ARG0:.*]]: tensor<1x?x?x1xf32>, +// CHECK-SAME: %[[ARG1:.*]]: index) -> tensor { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_2:.*]] = arith.constant dense<1.000000e+00> : tensor +// CHECK: %[[VAL_3:.*]] = tensor.collapse_shape %[[ARG0]] {{\[\[}}0, 1], [2, 3]] : tensor<1x?x?x1xf32> into tensor +// CHECK: %[[VAL_4:.*]] = tensor.empty(%[[ARG1]]) : tensor +// CHECK: %[[VAL_5:.*]] = affine.apply #[[$MAP0]](){{\[}}%[[ARG1]], %[[VAL_1]]] +// CHECK: %[[VAL_6:.*]] = tensor.empty(%[[VAL_5]]) : tensor +// CHECK: %[[VAL_7:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins(%[[VAL_3]], %[[VAL_2]], %[[VAL_4]] : tensor, tensor, tensor) outs(%[[VAL_6]] : tensor) { +// CHECK: ^bb0(%[[VAL_8:.*]]: f32, %[[VAL_9:.*]]: f32, %[[VAL_10:.*]]: f32, %[[VAL_11:.*]]: f32): +// CHECK: %[[VAL_12:.*]] = arith.mulf %[[VAL_8]], %[[VAL_9]] : f32 +// CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_10]], %[[VAL_12]] : f32 +// CHECK: linalg.yield %[[VAL_13]] : f32 +// CHECK: } -> tensor +// CHECK: %[[VAL_14:.*]] = tensor.expand_shape %[[VAL_7]] {{\[\[}}0, 1], [2, 3]] output_shape {{\[}}%[[VAL_0]], 1, 61, 1] : tensor into tensor +// CHECK: return %[[VAL_14]] : tensor +// CHECK: } + +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d4, d2 + d5, d6)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5, d6, d3)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)> +module { + func.func @drop_unit_dim_corresponding_to_dynamic_dim(%arg0: tensor<1x?x?x1xf32>, %arg1: index) -> tensor { + %cst = arith.constant dense<1.000000e+00> : tensor<1x1x1x1xf32> + %0 = tensor.empty(%arg1) : tensor + %1 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%arg0, %cst : tensor<1x?x?x1xf32>, tensor<1x1x1x1xf32>) outs(%0 : tensor) { + ^bb0(%in: f32, %in_0: f32, %out: f32): + %2 = arith.mulf %in, %in_0 : f32 + %3 = arith.addf %out, %2 : f32 + linalg.yield %3 : f32 + } -> tensor + return %1 : tensor + } +} diff --git a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir index 15a4f6cdd3bbe..e45a9fbb1052c 100644 --- a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir +++ b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir @@ -777,139 +777,6 @@ func.func @fuse_scalar_constant(%arg0 : tensor) -> (tensor, te // ----- -// CHECK-LABEL: @transpose_fold_2d_fp32 -func.func @transpose_fold_2d_fp32(%init: tensor<3x2xf32>) -> tensor<3x2xf32> { - %input = arith.constant dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]> : tensor<2x3xf32> - // CHECK: %[[CST:.+]] = arith.constant - // CHECK-SAME{LITERAL}: dense<[[0.000000e+00, 3.000000e+00], [1.000000e+00, 4.000000e+00], [2.000000e+00, 5.000000e+00]]> : tensor<3x2xf32> - %1 = linalg.generic { - indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], - iterator_types = ["parallel", "parallel"] - } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) { - ^bb0(%arg1: f32, %arg2: f32): - linalg.yield %arg1 : f32 - } -> tensor<3x2xf32> - // CHECK: return %[[CST]] - return %1 : tensor<3x2xf32> -} - -// ----- - -// CHECK-LABEL: @transpose_fold_2d_fp64 -func.func @transpose_fold_2d_fp64(%init: tensor<3x2xf64>) -> tensor<3x2xf64> { - %input = arith.constant dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]> : tensor<2x3xf64> - // CHECK: %[[CST:.+]] = arith.constant - // CHECK-SAME{LITERAL}: dense<[[0.000000e+00, 3.000000e+00], [1.000000e+00, 4.000000e+00], [2.000000e+00, 5.000000e+00]]> : tensor<3x2xf64> - %1 = linalg.generic { - indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], - iterator_types = ["parallel", "parallel"] - } ins(%input : tensor<2x3xf64>) outs(%init : tensor<3x2xf64>) { - ^bb0(%arg1: f64, %arg2: f64): - linalg.yield %arg1 : f64 - } -> tensor<3x2xf64> - // CHECK: return %[[CST]] - return %1 : tensor<3x2xf64> -} - -// ----- - -// CHECK-LABEL: @transpose_fold_4d_i32 -func.func @transpose_fold_4d_i32(%init: tensor<3x1x4x2xi32>) -> tensor<3x1x4x2xi32> { - %input = arith.constant dense<[[ - [[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11]], - [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]] - ]]> : tensor<1x2x3x4xi32> - // CHECK: %[[CST:.+]] = arith.constant dense<[ - // CHECK-SAME{LITERAL}: [[[0, 12], [1, 13], [2, 14], [3, 15]]], - // CHECK-SAME{LITERAL}: [[[4, 16], [5, 17], [6, 18], [7, 19]]], - // CHECK-SAME{LITERAL}: [[[8, 20], [9, 21], [10, 22], [11, 23]]] - // CHECK-SAME{LITERAL}: ]> - %1 = linalg.generic { - indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d0, d3, d1)>], - iterator_types = ["parallel", "parallel", "parallel", "parallel"] - } ins(%input : tensor<1x2x3x4xi32>) outs(%init : tensor<3x1x4x2xi32>) { - ^bb0(%arg1: i32, %arg2: i32): - linalg.yield %arg1 : i32 - } -> tensor<3x1x4x2xi32> - // CHECK: return %[[CST]] - return %1 : tensor<3x1x4x2xi32> -} - -// ----- - -// CHECK-LABEL: @transpose_fold_4d_i16 -func.func @transpose_fold_4d_i16(%init: tensor<3x1x4x2xi16>) -> tensor<3x1x4x2xi16> { - %input = arith.constant dense<[[ - [[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11]], - [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]] - ]]> : tensor<1x2x3x4xi16> - // CHECK: %[[CST:.+]] = arith.constant dense<[ - // CHECK-SAME{LITERAL}: [[[0, 12], [1, 13], [2, 14], [3, 15]]], - // CHECK-SAME{LITERAL}: [[[4, 16], [5, 17], [6, 18], [7, 19]]], - // CHECK-SAME{LITERAL}: [[[8, 20], [9, 21], [10, 22], [11, 23]]] - // CHECK-SAME{LITERAL}: ]> - %1 = linalg.generic { - indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d0, d3, d1)>], - iterator_types = ["parallel", "parallel", "parallel", "parallel"] - } ins(%input : tensor<1x2x3x4xi16>) outs(%init : tensor<3x1x4x2xi16>) { - ^bb0(%arg1: i16, %arg2: i16): - linalg.yield %arg1 : i16 - } -> tensor<3x1x4x2xi16> - // CHECK: return %[[CST]] - return %1 : tensor<3x1x4x2xi16> -} - -// ----- - -// CHECK-LABEL: @transpose_nofold_non_cst_input -func.func @transpose_nofold_non_cst_input(%input: tensor<2x3xf32>, %init: tensor<3x2xf32>) -> tensor<3x2xf32> { - // CHECK: linalg.generic - %1 = linalg.generic { - indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], - iterator_types = ["parallel", "parallel"] - } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) { - ^bb0(%arg1: f32, %arg2: f32): - linalg.yield %arg1 : f32 - } -> tensor<3x2xf32> - return %1 : tensor<3x2xf32> -} - -// ----- - -// CHECK-LABEL: @transpose_nofold_yield_const -func.func @transpose_nofold_yield_const(%init: tensor<3x2xf32>) -> tensor<3x2xf32> { - %input = arith.constant dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]> : tensor<2x3xf32> - %cst = arith.constant 8.0 : f32 - // CHECK: linalg.generic - %1 = linalg.generic { - indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], - iterator_types = ["parallel", "parallel"] - } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) { - ^bb0(%arg1: f32, %arg2: f32): - linalg.yield %cst : f32 - } -> tensor<3x2xf32> - return %1 : tensor<3x2xf32> -} - -// ----- - -// CHECK-LABEL: @transpose_nofold_multi_ops_in_region -func.func @transpose_nofold_multi_ops_in_region(%init: tensor<3x2xf32>) -> tensor<3x2xf32> { - %input = arith.constant dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]> : tensor<2x3xf32> - // CHECK: linalg.generic - %1 = linalg.generic { - indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], - iterator_types = ["parallel", "parallel"] - } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) { - ^bb0(%arg1: f32, %arg2: f32): - %add = arith.addf %arg1, %arg1 : f32 - linalg.yield %add : f32 - } -> tensor<3x2xf32> - return %1 : tensor<3x2xf32> -} - -// ----- - // Fusing the broadcast into a reduction would require to insert extra knowledge // about the size of the reduction dimension. As long, as this is not // implemented, we check that two linalg operations remain. diff --git a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir index 4f43ec2c9e1ce..31fac9b4b4165 100644 --- a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir +++ b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir @@ -204,6 +204,37 @@ func.func @conv_1d_ncw_fcw(%input: memref, %filter: memref // ----- +func.func @conv_2d_ngchw_gfchw_q(%input: memref, %filter: memref, %inputzp: i32, %filterzp: i32, %output: memref) { + linalg.conv_2d_ngchw_gfchw_q {dilations = dense<1> : tensor<2xi64>, + strides = dense<1> : tensor<2xi64>} + ins (%input, %filter, %inputzp, %filterzp: memref, memref, i32, i32) + outs (%output: memref) + return +} +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 + d6, d4 + d7)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)> +// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> ()> +// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)> + +// CHECK: func @conv_2d_ngchw_gfchw_q + +// CHECK: linalg.generic +// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP2]], #[[MAP3]]] +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} +// CHECK-SAME: ins(%{{.+}}, %{{.+}}, %{{.+}}, %{{.+}} : memref, memref, i32, i32) +// CHECK-SAME: outs(%{{.+}} : memref) + +// CHECK: ^{{.+}}(%[[BBARG0:.+]]: i8, %[[BBARG1:.+]]: i8, %[[BBARG2:.+]]: i32, %[[BBARG3:.+]]: i32, %[[BBARG4:.+]]: i32) +// CHECK-NEXT: %[[EXTSI0:.+]] = arith.extsi %[[BBARG0]] : i8 to i32 +// CHECK-NEXT: %[[SUB0:.+]] = arith.subi %[[EXTSI0]], %[[BBARG2]] : i32 +// CHECK-NEXT: %[[EXTSI1:.+]] = arith.extsi %[[BBARG1]] : i8 to i32 +// CHECK-NEXT: %[[SUB1:.+]] = arith.subi %[[EXTSI1]], %[[BBARG3]] : i32 +// CHECK-NEXT: %[[MUL:.+]] = arith.muli %[[SUB0]], %[[SUB1]] : i32 +// CHECK-NEXT: %[[ADD:.+]] = arith.addi %[[BBARG4]], %[[MUL]] : i32 +// CHECK-NEXT: linalg.yield %[[ADD]] : i32 + +// ----- + func.func @generalize_fill(%output: memref, %value : f32) { linalg.fill ins(%value : f32) outs(%output : memref) return diff --git a/mlir/test/Dialect/Linalg/named-ops.mlir b/mlir/test/Dialect/Linalg/named-ops.mlir index 051054e67edf0..02ecbed232c8b 100644 --- a/mlir/test/Dialect/Linalg/named-ops.mlir +++ b/mlir/test/Dialect/Linalg/named-ops.mlir @@ -441,6 +441,21 @@ func.func @conv_2d_ngchw_gfchw(%input: tensor<1x5x3x32x32xf32>, %filter: tensor< // ----- +// CHECK-LABEL: func @conv_2d_ngchw_gfchw_q +func.func @conv_2d_ngchw_gfchw_q(%input: tensor<1x5x3x32x32xi8>, %filter: tensor<5x2x3x3x3xi8>, %inputzp: i32, %filterzp: i32, %init: tensor<1x5x2x30x30xi32>) -> tensor<1x5x2x30x30xi32> { + // CHECK: linalg.conv_2d_ngchw_gfchw_q + // CHECK-SAME: dilations = dense<1> : tensor<2xi64> + // CHECK-SAME: strides = dense<1> : tensor<2xi64> + // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x5x3x32x32xi8>, tensor<5x2x3x3x3xi8>, i32, i32) + // CHECK-SAME: outs(%{{.+}} : tensor<1x5x2x30x30xi32>) -> tensor<1x5x2x30x30xi32> + %0 = linalg.conv_2d_ngchw_gfchw_q {dilations = dense<1> : tensor<2xi64>, + strides = dense<1> : tensor<2xi64>} + ins (%input, %filter, %inputzp, %filterzp: tensor<1x5x3x32x32xi8>, tensor<5x2x3x3x3xi8>, i32, i32) + outs (%init: tensor<1x5x2x30x30xi32>) -> tensor<1x5x2x30x30xi32> + return %0 : tensor<1x5x2x30x30xi32> +} +// ----- + // CHECK-LABEL: func @conv_3d_ndhwc_dhwcf func.func @conv_3d_ndhwc_dhwcf(%input: tensor, %filter: tensor, %init: tensor) -> tensor { // CHECK: %{{.+}} = linalg.conv_3d_ndhwc_dhwcf diff --git a/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir b/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir index 31e9fd00cffa0..9849f36285b16 100644 --- a/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir @@ -407,3 +407,95 @@ module attributes {transform.with_named_sequence} { transform.yield } } + +// ----- +// Checks we use nan as the neutral element for maxnumf op. +func.func @generic_split_maxnumf(%in: tensor<32xf32>, %out: tensor) -> tensor { + %r = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, + affine_map<(d0) -> ()>], + iterator_types = ["reduction"]} + ins(%in : tensor<32xf32>) + outs(%out : tensor) { + ^bb0(%arg1: f32, %arg2: f32): + %y = arith.maxnumf %arg1, %arg2 : f32 + linalg.yield %y : f32 + } -> tensor + return %r : tensor +} + +// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)> +// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)> +// CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0) -> (d0)> +// CHECK-DAG: #[[$MAP3:.*]] = affine_map<(d0) -> ()> +// CHECK-LABEL: func @generic_split_maxnumf +// The float value 0xFFC00000 that is filled into the init tensor represents negative NaN. +// CHECK-DAG: %[[ID:.*]] = arith.constant 0xFFC00000 : f32 +// CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1]] output_shape [8, 4] : tensor<32xf32> into tensor<8x4xf32> +// CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<4xf32> +// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<4xf32>) -> tensor<4xf32> +// CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} +// CHECK-SAME: ins(%[[I1]] : tensor<8x4xf32>) outs(%[[F]] : tensor<4xf32>) { +// CHECK: arith.maxnumf +// CHECK: linalg.yield +// CHECK: } -> tensor<4xf32> +// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP3]]], iterator_types = ["reduction"]} +// CHECK-SAME: ins(%[[G]] : tensor<4xf32>) outs(%{{.*}} : tensor) { +// CHECK: arith.maxnumf {{.*}} +// CHECK: linalg.yield +// CHECK: } -> tensor +// CHECK: return %[[R]] : tensor + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1:4 = transform.structured.split_reduction %0 { split_factor = 4, insert_split_dimension = 0, inner_parallel} + : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op) + transform.yield + } +} + +// ----- +// Checks we use nan as the neutral element for minnumf op. +func.func @generic_split_minnumf(%in: tensor<32xf32>, %out: tensor) -> tensor { + %r = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, + affine_map<(d0) -> ()>], + iterator_types = ["reduction"]} + ins(%in : tensor<32xf32>) + outs(%out : tensor) { + ^bb0(%arg1: f32, %arg2: f32): + %y = arith.minnumf %arg1, %arg2 : f32 + linalg.yield %y : f32 + } -> tensor + return %r : tensor +} + +// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)> +// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)> +// CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0) -> (d0)> +// CHECK-DAG: #[[$MAP3:.*]] = affine_map<(d0) -> ()> +// CHECK-LABEL: func @generic_split_minnumf +// The float value 0x7FC00000 that is filled into the init tensor represents positive NaN. +// CHECK-DAG: %[[ID:.*]] = arith.constant 0x7FC00000 : f32 +// CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1]] output_shape [8, 4] : tensor<32xf32> into tensor<8x4xf32> +// CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<4xf32> +// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<4xf32>) -> tensor<4xf32> +// CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} +// CHECK-SAME: ins(%[[I1]] : tensor<8x4xf32>) outs(%[[F]] : tensor<4xf32>) { +// CHECK: arith.minnumf +// CHECK: linalg.yield +// CHECK: } -> tensor<4xf32> +// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP3]]], iterator_types = ["reduction"]} +// CHECK-SAME: ins(%[[G]] : tensor<4xf32>) outs(%{{.*}} : tensor) { +// CHECK: arith.minnumf {{.*}} +// CHECK: linalg.yield +// CHECK: } -> tensor +// CHECK: return %[[R]] : tensor + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1:4 = transform.structured.split_reduction %0 { split_factor = 4, insert_split_dimension = 0, inner_parallel} + : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op) + transform.yield + } +} diff --git a/mlir/test/Dialect/Shape/bufferize.mlir b/mlir/test/Dialect/Shape/bufferize.mlir index 963a5e8bcf578..9f30a052208f0 100644 --- a/mlir/test/Dialect/Shape/bufferize.mlir +++ b/mlir/test/Dialect/Shape/bufferize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -split-input-file -shape-bufferize <%s | FileCheck %s +// RUN: mlir-opt -split-input-file --one-shot-bufferize="dialect-filter=shape,bufferization copy-before-write unknown-type-conversion=identity-layout-map allow-unknown-ops" <%s | FileCheck %s // ----- diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir index f23f6ac4f181e..ff0fb22431d69 100644 --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -144,7 +144,7 @@ func.func @sparse_new3d(%arg0: !llvm.ptr) -> tensor { // CHECK-DAG: %[[Iota:.*]] = memref.cast %[[Iota0]] : memref<2xindex> to memref // CHECK-DAG: memref.store %[[I]], %[[Sizes0]][%[[C0]]] : memref<2xindex> // CHECK-DAG: memref.store %[[J]], %[[Sizes0]][%[[C1]]] : memref<2xindex> -// CHECK: %[[NP:.*]] = llvm.mlir.zero : !llvm.ptr +// CHECK-DAG: %[[NP:.*]] = llvm.mlir.zero : !llvm.ptr // CHECK: %[[T:.*]] = call @newSparseTensor(%[[Sizes]], %[[Sizes]], %[[LvlTypes]], %[[Iota]], %[[Iota]], %{{.*}}, %{{.*}}, %{{.*}}, %[[Empty]], %[[NP]]) // CHECK: return %[[T]] : !llvm.ptr func.func @sparse_init(%arg0: index, %arg1: index) -> tensor { diff --git a/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir b/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir index 6e8a26762d90f..df3e4b0ed60c7 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir @@ -6,6 +6,7 @@ // CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr, // CHECK-SAME: %[[VAL_1:.*]]: !llvm.ptr) -> !llvm.ptr { // CHECK-DAG: %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK-DAG: %[[ZERO:.*]] = llvm.mlir.zero : !llvm.ptr // CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : i32 // CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : i32 // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index @@ -27,8 +28,7 @@ // CHECK: %[[VAL_17:.*]] = memref.cast %[[VAL_16]] : memref<2xindex> to memref // CHECK: memref.store %[[VAL_5]], %[[VAL_16]]{{\[}}%[[VAL_5]]] : memref<2xindex> // CHECK: memref.store %[[VAL_6]], %[[VAL_16]]{{\[}}%[[VAL_6]]] : memref<2xindex> -// CHECK: %[[VAL_18:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK: %[[VAL_19:.*]] = call @newSparseTensor(%[[VAL_15]], %[[VAL_15]], %[[VAL_13]], %[[VAL_17]], %[[VAL_17]], %[[VAL_4]], %[[VAL_4]], %[[VAL_3]], %[[VAL_4]], %[[VAL_18]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK: %[[VAL_19:.*]] = call @newSparseTensor(%[[VAL_15]], %[[VAL_15]], %[[VAL_13]], %[[VAL_17]], %[[VAL_17]], %[[VAL_4]], %[[VAL_4]], %[[VAL_3]], %[[VAL_4]], %[[ZERO]]) : (memref, memref, memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr // CHECK: %[[VAL_20:.*]] = memref.alloc() : memref<300xf64> // CHECK: %[[VAL_21:.*]] = memref.cast %[[VAL_20]] : memref<300xf64> to memref // CHECK: %[[VAL_22:.*]] = memref.alloc() : memref<300xi1> diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower.mlir index 6112856fbf293..c27df00785522 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_lower.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_lower.mlir @@ -4,8 +4,7 @@ // RUN: FileCheck %s --check-prefix=CHECK-MIR // // RUN: mlir-opt %s --sparse-reinterpret-map -sparsification --sparse-tensor-conversion --cse \ -// RUN: --func-bufferize --arith-bufferize \ -// RUN: --tensor-bufferize --finalizing-bufferize | \ +// RUN: --one-shot-bufferize="copy-before-write bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" | \ // RUN: FileCheck %s --check-prefix=CHECK-LIR #CSR = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : dense, d1 : compressed)}> diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir index 401da152a8bdb..9fbb9dd0a26d1 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir @@ -4,8 +4,7 @@ // RUN: FileCheck %s --check-prefix=CHECK-MIR // // RUN: mlir-opt %s --sparse-reinterpret-map -sparsification --sparse-tensor-conversion --cse \ -// RUN: --func-bufferize --arith-bufferize \ -// RUN: --tensor-bufferize --finalizing-bufferize | \ +// RUN: --one-shot-bufferize="copy-before-write bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" | \ // RUN: FileCheck %s --check-prefix=CHECK-LIR #CSC = #sparse_tensor.encoding<{ diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir index d769876d8ee8e..a827360abb426 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir @@ -4,8 +4,7 @@ // RUN: FileCheck %s --check-prefix=CHECK-MIR // // RUN: mlir-opt %s --sparse-reinterpret-map -sparsification --sparse-tensor-conversion --cse \ -// RUN: --func-bufferize --arith-bufferize \ -// RUN: --tensor-bufferize --finalizing-bufferize | \ +// RUN: --one-shot-bufferize="copy-before-write bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" | \ // RUN: FileCheck %s --check-prefix=CHECK-LIR #CSR = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : dense, d1 : compressed)}> diff --git a/mlir/test/Dialect/SparseTensor/specifier_to_llvm.mlir b/mlir/test/Dialect/SparseTensor/specifier_to_llvm.mlir index b647fe0cdeed0..00ff29125fb5f 100644 --- a/mlir/test/Dialect/SparseTensor/specifier_to_llvm.mlir +++ b/mlir/test/Dialect/SparseTensor/specifier_to_llvm.mlir @@ -3,12 +3,12 @@ #CSR = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : dense, d1 : compressed)}> // CHECK-LABEL: func.func @sparse_metadata_init() -> !llvm.struct<(array<2 x i64>, array<3 x i64>)> { -// CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 -// CHECK: %[[VAL_1:.*]] = llvm.mlir.undef : !llvm.struct<(array<2 x i64>, array<3 x i64>)> -// CHECK: %[[VAL_2:.*]] = llvm.insertvalue %[[VAL_0]], %[[VAL_1]][1, 0] : !llvm.struct<(array<2 x i64>, array<3 x i64>)> -// CHECK: %[[VAL_3:.*]] = llvm.insertvalue %[[VAL_0]], %[[VAL_2]][1, 1] : !llvm.struct<(array<2 x i64>, array<3 x i64>)> -// CHECK: %[[VAL_4:.*]] = llvm.insertvalue %[[VAL_0]], %[[VAL_3]][1, 2] : !llvm.struct<(array<2 x i64>, array<3 x i64>)> -// CHECK: return %[[VAL_4]] : !llvm.struct<(array<2 x i64>, array<3 x i64>)> +// CHECK-DAG: %[[STRUCT:.*]] = llvm.mlir.undef : !llvm.struct<(array<2 x i64>, array<3 x i64>)> +// CHECK-DAG: %[[CST0:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_1:.*]] = llvm.insertvalue %[[CST0]], %[[STRUCT]][1, 0] : !llvm.struct<(array<2 x i64>, array<3 x i64>)> +// CHECK: %[[VAL_2:.*]] = llvm.insertvalue %[[CST0]], %[[VAL_1]][1, 1] : !llvm.struct<(array<2 x i64>, array<3 x i64>)> +// CHECK: %[[VAL_3:.*]] = llvm.insertvalue %[[CST0]], %[[VAL_2]][1, 2] : !llvm.struct<(array<2 x i64>, array<3 x i64>)> +// CHECK: return %[[VAL_3]] : !llvm.struct<(array<2 x i64>, array<3 x i64>)> // CHECK: } func.func @sparse_metadata_init() -> !sparse_tensor.storage_specifier<#CSR> { %0 = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier<#CSR> diff --git a/mlir/test/Dialect/Tensor/bufferize.mlir b/mlir/test/Dialect/Tensor/bufferize.mlir index 4f553adcc500f..e85d9e740adf4 100644 --- a/mlir/test/Dialect/Tensor/bufferize.mlir +++ b/mlir/test/Dialect/Tensor/bufferize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -tensor-bufferize -cse -split-input-file | FileCheck %s +// RUN: mlir-opt %s --one-shot-bufferize="dialect-filter=tensor,bufferization copy-before-write unknown-type-conversion=identity-layout-map" -cse -split-input-file | FileCheck %s // CHECK-LABEL: func @dim( // CHECK-SAME: %[[TENSOR:.*]]: tensor<*xf32>, diff --git a/mlir/test/Dialect/Vector/bufferize-invalid.mlir b/mlir/test/Dialect/Vector/bufferize-invalid.mlir index 1ae3e312c868f..bcca50a0fe79a 100644 --- a/mlir/test/Dialect/Vector/bufferize-invalid.mlir +++ b/mlir/test/Dialect/Vector/bufferize-invalid.mlir @@ -1,5 +1,4 @@ -// RUN: mlir-opt %s -vector-bufferize -split-input-file -verify-diagnostics -// | FileCheck %s +// RUN: mlir-opt %s --one-shot-bufferize="dialect-filter=vector,bufferization copy-before-write unknown-type-conversion=identity-layout-map allow-unknown-ops" -split-input-file -verify-diagnostics // CHECK-LABEL: func @mask( func.func @mask(%t0: tensor, %val: vector<16xf32>, %idx: index, %m0: vector<16xi1>) -> tensor { diff --git a/mlir/test/Dialect/Vector/bufferize.mlir b/mlir/test/Dialect/Vector/bufferize.mlir index 6a6a8fa8938bc..3399f60a2c3bf 100644 --- a/mlir/test/Dialect/Vector/bufferize.mlir +++ b/mlir/test/Dialect/Vector/bufferize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -vector-bufferize -split-input-file | FileCheck %s +// RUN: mlir-opt %s --one-shot-bufferize="dialect-filter=vector,bufferization copy-before-write unknown-type-conversion=identity-layout-map" -split-input-file | FileCheck %s // CHECK-LABEL: func @transfer_read( // CHECK-SAME: %[[t:.*]]: tensor, %[[o1:.*]]: index, %[[o2:.*]]: index, %[[pad:.*]]: f32) diff --git a/mlir/test/Dialect/Vector/linearize.mlir b/mlir/test/Dialect/Vector/linearize.mlir index b29ceab5783d7..31a59b809a74b 100644 --- a/mlir/test/Dialect/Vector/linearize.mlir +++ b/mlir/test/Dialect/Vector/linearize.mlir @@ -245,3 +245,32 @@ func.func @test_vector_extract(%arg0: vector<2x8x2xf32>) -> vector<8x2xf32> { %0 = vector.extract %arg0[1]: vector<8x2xf32> from vector<2x8x2xf32> return %0 : vector<8x2xf32> } + +// ----- +// ALL-LABEL: test_vector_insert +// ALL-SAME: (%[[DEST:.*]]: vector<2x8x4xf32>, %[[SRC:.*]]: vector<8x4xf32>) -> vector<2x8x4xf32> { +func.func @test_vector_insert(%arg0: vector<2x8x4xf32>, %arg1: vector<8x4xf32>) -> vector<2x8x4xf32> { + // DEFAULT: %[[ARG_SRC:.*]] = vector.shape_cast %[[SRC]] : vector<8x4xf32> to vector<32xf32> + // DEFAULT: %[[ARG_DEST:.*]] = vector.shape_cast %[[DEST]] : vector<2x8x4xf32> to vector<64xf32> + // DEFAULT: %[[SHUFFLE:.*]] = vector.shuffle %[[ARG_DEST]], %[[ARG_SRC]] + // DEFAULT-SAME: [64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, + // DEFAULT-SAME: 88, 89, 90, 91, 92, 93, 94, 95, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + // DEFAULT-SAME: 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63] : vector<64xf32>, vector<32xf32> + // DEFAULT: %[[RES:.*]] = vector.shape_cast %[[SHUFFLE]] : vector<64xf32> to vector<2x8x4xf32> + // DEFAULT: return %[[RES]] : vector<2x8x4xf32> + + // BW-128: %[[ARG_SRC:.*]] = vector.shape_cast %[[SRC]] : vector<8x4xf32> to vector<32xf32> + // BW-128: %[[ARG_DEST:.*]] = vector.shape_cast %[[DEST]] : vector<2x8x4xf32> to vector<64xf32> + // BW-128: %[[SHUFFLE:.*]] = vector.shuffle %[[ARG_DEST]], %[[ARG_SRC]] + // BW-128-SAME: [64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, + // BW-128-SAME: 88, 89, 90, 91, 92, 93, 94, 95, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + // BW-128-SAME: 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63] : vector<64xf32>, vector<32xf32> + // BW-128: %[[RES:.*]] = vector.shape_cast %[[SHUFFLE]] : vector<64xf32> to vector<2x8x4xf32> + // BW-128: return %[[RES]] : vector<2x8x4xf32> + + // BW-0: %[[RES:.*]] = vector.insert %[[SRC]], %[[DEST]] [0] : vector<8x4xf32> into vector<2x8x4xf32> + // BW-0: return %[[RES]] : vector<2x8x4xf32> + + %0 = vector.insert %arg1, %arg0[0]: vector<8x4xf32> into vector<2x8x4xf32> + return %0 : vector<2x8x4xf32> +} diff --git a/mlir/test/Examples/NVGPU/tools/nvdsl.py b/mlir/test/Examples/NVGPU/tools/nvdsl.py index 600cae5b47eee..90dbb2355e1c8 100644 --- a/mlir/test/Examples/NVGPU/tools/nvdsl.py +++ b/mlir/test/Examples/NVGPU/tools/nvdsl.py @@ -431,7 +431,7 @@ def __str__(self): # saveIR(module) # Verify the module - # module.operation.verify() + module.operation.verify() # Compile and JIT MLIR module options = f"cubin-chip=sm_90a cubin-features=+ptx80 opt-level=3" diff --git a/mlir/test/Examples/transform/ChH/full.mlir b/mlir/test/Examples/transform/ChH/full.mlir index f8d910370bc27..259475ebdbf49 100644 --- a/mlir/test/Examples/transform/ChH/full.mlir +++ b/mlir/test/Examples/transform/ChH/full.mlir @@ -380,27 +380,29 @@ module attributes { transform.with_named_sequence } { // immediately adjacent fma on vector<64xf32>. // CHECK: %[[R0:.+]] = llvm.mlir.undef : !llvm.array<5 x vector<64xf32>> -// CHECK-NEXT: %[[LINE0:.+]] = llvm.extractvalue %[[V:.+]][0] : !llvm.array<5 x vector<64xf32>> + +// CHECK: %[[V:.+]] = llvm.load %{{.*}} : !llvm.ptr -> !llvm.array<5 x vector<64xf32>> +// CHECK-NEXT: %[[LINE0:.+]] = llvm.extractvalue %[[V]][0] : !llvm.array<5 x vector<64xf32>> // CHECK-NEXT: %[[FMA0:.+]] = llvm.intr.fma(%{{.*}}, %{{.*}}, %[[LINE0]]) // CHECK-SAME: -> vector<64xf32> // CHECK-NEXT: %[[R1:.+]] = llvm.insertvalue %[[FMA0]], %[[R0]][0] -// CHECK-NEXT: %[[LINE1:.+]] = llvm.extractvalue %[[V:.+]][1] : !llvm.array<5 x vector<64xf32>> +// CHECK-NEXT: %[[LINE1:.+]] = llvm.extractvalue %[[V]][1] : !llvm.array<5 x vector<64xf32>> // CHECK-NEXT: %[[FMA1:.+]] = llvm.intr.fma(%{{.*}}, %{{.*}}, %[[LINE1]]) // CHECK-SAME: -> vector<64xf32> // CHECK-NEXT: %[[R2:.+]] = llvm.insertvalue %[[FMA1]], %[[R1]][1] -// CHECK-NEXT: %[[LINE2:.+]] = llvm.extractvalue %[[V:.+]][2] : !llvm.array<5 x vector<64xf32>> +// CHECK-NEXT: %[[LINE2:.+]] = llvm.extractvalue %[[V]][2] : !llvm.array<5 x vector<64xf32>> // CHECK-NEXT: %[[FMA2:.+]] = llvm.intr.fma(%{{.*}}, %{{.*}}, %[[LINE2]]) // CHECK-SAME: -> vector<64xf32> // CHECK-NEXT: %[[R3:.+]] = llvm.insertvalue %[[FMA2]], %[[R2]][2] -// CHECK-NEXT: %[[LINE3:.+]] = llvm.extractvalue %[[V:.+]][3] : !llvm.array<5 x vector<64xf32>> +// CHECK-NEXT: %[[LINE3:.+]] = llvm.extractvalue %[[V]][3] : !llvm.array<5 x vector<64xf32>> // CHECK-NEXT: %[[FMA3:.+]] = llvm.intr.fma(%{{.*}}, %{{.*}}, %[[LINE3]]) // CHECK-SAME: -> vector<64xf32> // CHECK-NEXT: %[[R4:.+]] = llvm.insertvalue %[[FMA3]], %[[R3]][3] -// CHECK-NEXT: %[[LINE4:.+]] = llvm.extractvalue %[[V:.+]][4] : !llvm.array<5 x vector<64xf32>> +// CHECK-NEXT: %[[LINE4:.+]] = llvm.extractvalue %[[V]][4] : !llvm.array<5 x vector<64xf32>> // CHECK-NEXT: %[[FMA4:.+]] = llvm.intr.fma(%{{.*}}, %{{.*}}, %[[LINE4]]) // CHECK-SAME: -> vector<64xf32> // CHECK-NEXT: %[[R5:.+]] = llvm.insertvalue %[[FMA4]], %[[R4]][4] diff --git a/mlir/test/Integration/Dialect/Complex/CPU/correctness.mlir b/mlir/test/Integration/Dialect/Complex/CPU/correctness.mlir index b0e414d157268..5d27c3e290d50 100644 --- a/mlir/test/Integration/Dialect/Complex/CPU/correctness.mlir +++ b/mlir/test/Integration/Dialect/Complex/CPU/correctness.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: -func-bufferize -tensor-bufferize -arith-bufferize --canonicalize \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" --canonicalize \ // RUN: -convert-scf-to-cf --convert-complex-to-standard \ // RUN: -finalize-memref-to-llvm -convert-math-to-llvm -convert-math-to-libm \ // RUN: -convert-vector-to-llvm -convert-complex-to-llvm \ diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-collapse-tensor.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-collapse-tensor.mlir index 43e423d4c3e8e..734e09b7ed103 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-collapse-tensor.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-collapse-tensor.mlir @@ -1,10 +1,10 @@ -// RUN: mlir-opt %s -linalg-bufferize \ -// RUN: -arith-bufferize -tensor-bufferize -func-bufferize \ +// RUN: mlir-opt %s \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ // RUN: -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref \ // RUN: -convert-scf-to-cf -expand-strided-metadata -lower-affine -convert-cf-to-llvm -convert-arith-to-llvm \ // RUN: -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ -// RUN: -shared-libs=%mlir_runner_utils \ +// RUN: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils \ // RUN: | FileCheck %s diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-elementwise.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-elementwise.mlir index 84dad567ced3f..a323b0d9f876c 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-elementwise.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-elementwise.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s -convert-elementwise-to-linalg \ -// RUN: -arith-bufferize -linalg-bufferize -tensor-bufferize -func-bufferize \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ // RUN: -canonicalize -buffer-deallocation-pipeline -convert-bufferization-to-memref -convert-linalg-to-loops \ // RUN: -convert-scf-to-cf -convert-arith-to-llvm -convert-cf-to-llvm --finalize-memref-to-llvm \ // RUN: -convert-func-to-llvm -reconcile-unrealized-casts | \ diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-expand-tensor.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-expand-tensor.mlir index db882f7a54d39..45283e173c9f0 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-expand-tensor.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-expand-tensor.mlir @@ -1,10 +1,10 @@ -// RUN: mlir-opt %s -linalg-bufferize \ -// RUN: -arith-bufferize -tensor-bufferize -func-bufferize \ +// RUN: mlir-opt %s \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ // RUN: -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref \ // RUN: -convert-scf-to-cf -expand-strided-metadata -lower-affine -convert-cf-to-llvm -convert-arith-to-llvm \ // RUN: -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ -// RUN: -shared-libs=%mlir_runner_utils \ +// RUN: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils \ // RUN: | FileCheck %s diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir index 54a2bbf8d4680..23a07464bb5be 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir @@ -1,6 +1,5 @@ // RUN: mlir-opt %s -test-linalg-transform-patterns=test-linalg-to-vector-patterns \ -// RUN: -empty-tensor-to-alloc-tensor -linalg-bufferize -arith-bufferize \ -// RUN: -bufferization-bufferize -tensor-bufferize -func-bufferize \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ // RUN: -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref \ // RUN: -convert-linalg-to-loops -convert-scf-to-cf -expand-strided-metadata \ // RUN: -lower-affine -convert-arith-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir index 98fce6c020c03..01a0ba26fd7cd 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -linalg-bufferize \ -// RUN: -arith-bufferize -tensor-bufferize -func-bufferize \ +// RUN: mlir-opt %s \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ // RUN: -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref \ // RUN: -convert-linalg-to-loops -convert-scf-to-cf -expand-strided-metadata \ // RUN: -lower-affine -convert-arith-to-llvm --finalize-memref-to-llvm \ diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir index cf7d0c762ea36..73d4aff73fb7a 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -linalg-bufferize \ -// RUN: -arith-bufferize -tensor-bufferize -func-bufferize \ +// RUN: mlir-opt %s \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ // RUN: -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref \ // RUN: -convert-linalg-to-loops -convert-scf-to-cf -expand-strided-metadata \ // RUN: -lower-affine -convert-arith-to-llvm --finalize-memref-to-llvm \ diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-e2e.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-e2e.mlir index 38b49cd444df3..ff9ddedf91e17 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-e2e.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-e2e.mlir @@ -1,5 +1,6 @@ -// RUN: mlir-opt %s -arith-bufferize -linalg-bufferize \ -// RUN: -tensor-bufferize -func-bufferize -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref -convert-linalg-to-loops \ +// RUN: mlir-opt %s \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ +// RUN: -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref -convert-linalg-to-loops \ // RUN: -convert-arith-to-llvm -convert-scf-to-cf -convert-cf-to-llvm --finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_runner_utils \ diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir index 41296cdfcb2d5..698191577efe3 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir @@ -1,14 +1,14 @@ // UNSUPPORTED: asan -// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -linalg-bufferize -arith-bufferize \ -// RUN: -tensor-bufferize -func-bufferize -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref -convert-linalg-to-loops -convert-scf-to-cf \ +// RUN: mlir-opt %s -test-transform-dialect-erase-schedule \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ +// RUN: -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref -convert-linalg-to-loops -convert-scf-to-cf \ // RUN: -expand-strided-metadata -lower-affine -convert-arith-to-llvm -convert-scf-to-cf --finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -transform-interpreter -test-transform-dialect-erase-schedule -linalg-bufferize \ -// RUN: -scf-bufferize -arith-bufferize -tensor-bufferize \ -// RUN: -func-bufferize \ +// RUN: mlir-opt %s -transform-interpreter -test-transform-dialect-erase-schedule \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ // RUN: -finalizing-bufferize -convert-linalg-to-loops -convert-scf-to-cf -convert-scf-to-cf \ // RUN: -expand-strided-metadata -lower-affine -convert-arith-to-llvm -convert-scf-to-cf --finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ diff --git a/mlir/test/Integration/Dialect/Memref/print-memref.mlir b/mlir/test/Integration/Dialect/Memref/print-memref.mlir index b83f3919efd83..f59e220d7461e 100644 --- a/mlir/test/Integration/Dialect/Memref/print-memref.mlir +++ b/mlir/test/Integration/Dialect/Memref/print-memref.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: -func-bufferize -arith-bufferize --canonicalize \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" --canonicalize \ // RUN: -finalize-memref-to-llvm\ // RUN: -convert-func-to-llvm -reconcile-unrealized-casts |\ // RUN: mlir-cpu-runner \ diff --git a/mlir/test/Integration/Dialect/Memref/verify-memref.mlir b/mlir/test/Integration/Dialect/Memref/verify-memref.mlir index b7e2a46688f47..431ae0a89d20c 100644 --- a/mlir/test/Integration/Dialect/Memref/verify-memref.mlir +++ b/mlir/test/Integration/Dialect/Memref/verify-memref.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: -func-bufferize -arith-bufferize --canonicalize \ +// RUN: -func-bufferize -one-shot-bufferize="bufferize-function-boundaries" --canonicalize \ // RUN: -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm\ // RUN: -convert-func-to-llvm -reconcile-unrealized-casts |\ // RUN: mlir-cpu-runner \ diff --git a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf-full.mlir b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf-full.mlir index faa129efa63a9..a7c5b91273423 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf-full.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf-full.mlir @@ -1,5 +1,6 @@ -// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-cf \ -// RUN: -arith-bufferize -convert-vector-to-llvm="enable-amx" \ +// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ +// RUN: -convert-scf-to-cf -convert-vector-to-llvm="enable-amx" \ // RUN: -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-translate -mlir-to-llvmir | \ // RUN: %lli --entry-function=entry --mattr="+amx-tile,+amx-int8,+amx-bf16" \ diff --git a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli-full.mlir b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli-full.mlir index 3ed28fc68acb8..7b7ee54db8c34 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli-full.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli-full.mlir @@ -1,5 +1,7 @@ -// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-cf \ -// RUN: -arith-bufferize -convert-vector-to-llvm="enable-amx" \ +// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ +// RUN: -convert-scf-to-cf \ +// RUN: -convert-vector-to-llvm="enable-amx" \ // RUN: -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-translate -mlir-to-llvmir | \ // RUN: %lli --entry-function=entry --mattr="+amx-tile,+amx-int8,+amx-bf16" \ diff --git a/mlir/test/Target/Cpp/expressions.mlir b/mlir/test/Target/Cpp/expressions.mlir index 2eda58902cb1d..aaddd5af874a9 100644 --- a/mlir/test/Target/Cpp/expressions.mlir +++ b/mlir/test/Target/Cpp/expressions.mlir @@ -65,15 +65,15 @@ func.func @do_not_inline(%arg0: i32, %arg1: i32, %arg2 : i32) -> i32 { return %e : i32 } -// CPP-DEFAULT: float paranthesis_for_low_precedence(int32_t [[VAL_1:v[0-9]+]], int32_t [[VAL_2:v[0-9]+]], int32_t [[VAL_3:v[0-9]+]]) { +// CPP-DEFAULT: float parentheses_for_low_precedence(int32_t [[VAL_1:v[0-9]+]], int32_t [[VAL_2:v[0-9]+]], int32_t [[VAL_3:v[0-9]+]]) { // CPP-DEFAULT-NEXT: return (float) ([[VAL_1]] + [[VAL_2]] * [[VAL_3]]); // CPP-DEFAULT-NEXT: } -// CPP-DECLTOP: float paranthesis_for_low_precedence(int32_t [[VAL_1:v[0-9]+]], int32_t [[VAL_2:v[0-9]+]], int32_t [[VAL_3:v[0-9]+]]) { +// CPP-DECLTOP: float parentheses_for_low_precedence(int32_t [[VAL_1:v[0-9]+]], int32_t [[VAL_2:v[0-9]+]], int32_t [[VAL_3:v[0-9]+]]) { // CPP-DECLTOP-NEXT: return (float) ([[VAL_1]] + [[VAL_2]] * [[VAL_3]]); // CPP-DECLTOP-NEXT: } -func.func @paranthesis_for_low_precedence(%arg0: i32, %arg1: i32, %arg2: i32) -> f32 { +func.func @parentheses_for_low_precedence(%arg0: i32, %arg1: i32, %arg2: i32) -> f32 { %e = emitc.expression : f32 { %a = emitc.add %arg0, %arg1 : (i32, i32) -> i32 %b = emitc.mul %a, %arg2 : (i32, i32) -> i32 @@ -83,6 +83,23 @@ func.func @paranthesis_for_low_precedence(%arg0: i32, %arg1: i32, %arg2: i32) -> return %e : f32 } +// CPP-DEFAULT: int32_t parentheses_for_same_precedence(int32_t [[VAL_1:v[0-9]+]], int32_t [[VAL_2:v[0-9]+]], int32_t [[VAL_3:v[0-9]+]]) { +// CPP-DEFAULT-NEXT: return [[VAL_3]] / ([[VAL_1]] * [[VAL_2]]); +// CPP-DEFAULT-NEXT: } + +// CPP-DECLTOP: int32_t parentheses_for_same_precedence(int32_t [[VAL_1:v[0-9]+]], int32_t [[VAL_2:v[0-9]+]], int32_t [[VAL_3:v[0-9]+]]) { +// CPP-DECLTOP-NEXT: return [[VAL_3]] / ([[VAL_1]] * [[VAL_2]]); +// CPP-DECLTOP-NEXT: } +func.func @parentheses_for_same_precedence(%arg0: i32, %arg1: i32, %arg2: i32) -> i32 { + %e = emitc.expression : i32 { + %0 = emitc.mul %arg0, %arg1 : (i32, i32) -> i32 + %1 = emitc.div %arg2, %0 : (i32, i32) -> i32 + emitc.yield %1 : i32 + } + + return %e : i32 +} + // CPP-DEFAULT: int32_t multiple_uses(int32_t [[VAL_1:v[0-9]+]], int32_t [[VAL_2:v[0-9]+]], int32_t [[VAL_3:v[0-9]+]], int32_t [[VAL_4:v[0-9]+]]) { // CPP-DEFAULT-NEXT: bool [[VAL_5:v[0-9]+]] = bar([[VAL_1]] * [[VAL_2]], [[VAL_3]]) - [[VAL_4]] < [[VAL_2]]; // CPP-DEFAULT-NEXT: int32_t [[VAL_6:v[0-9]+]]; diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index 18324482153a5..9d7e0a7928ab8 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -2750,7 +2750,7 @@ def TestGraphLoopOp : TEST_Op<"graph_loop", def InferIntRangeType : AnyTypeOf<[AnyInteger, Index]>; def TestWithBoundsOp : TEST_Op<"with_bounds", - [DeclareOpInterfaceMethods, + [DeclareOpInterfaceMethods, NoMemoryEffect]> { let arguments = (ins APIntAttr:$umin, APIntAttr:$umax, @@ -2762,7 +2762,7 @@ def TestWithBoundsOp : TEST_Op<"with_bounds", } def TestWithBoundsRegionOp : TEST_Op<"with_bounds_region", - [DeclareOpInterfaceMethods, + [DeclareOpInterfaceMethods, SingleBlock, NoTerminator]> { let arguments = (ins APIntAttr:$umin, APIntAttr:$umax, @@ -2774,7 +2774,7 @@ def TestWithBoundsRegionOp : TEST_Op<"with_bounds_region", } def TestIncrementOp : TEST_Op<"increment", - [DeclareOpInterfaceMethods, + [DeclareOpInterfaceMethods, NoMemoryEffect, AllTypesMatch<["value", "result"]>]> { let arguments = (ins InferIntRangeType:$value); let results = (outs InferIntRangeType:$result); @@ -2783,7 +2783,8 @@ def TestIncrementOp : TEST_Op<"increment", } def TestReflectBoundsOp : TEST_Op<"reflect_bounds", - [DeclareOpInterfaceMethods, AllTypesMatch<["value", "result"]>]> { + [DeclareOpInterfaceMethods, + AllTypesMatch<["value", "result"]>]> { let arguments = (ins InferIntRangeType:$value, OptionalAttr:$umin, OptionalAttr:$umax, diff --git a/mlir/test/python/dialects/scf.py b/mlir/test/python/dialects/scf.py index ee8d09aa301d9..95a6de86b670d 100644 --- a/mlir/test/python/dialects/scf.py +++ b/mlir/test/python/dialects/scf.py @@ -176,6 +176,56 @@ def range_loop_7(lb, ub, step, memref_v): memref.store(add, memref_v, [i]) scf.yield_([]) + # CHECK: func.func @loop_yield_1(%[[VAL_0:.*]]: index, %[[VAL_1:.*]]: index, %[[VAL_2:.*]]: index, %[[VAL_3:.*]]: memref<10xindex>) { + # CHECK: %[[VAL_4:.*]] = arith.constant 0 : index + # CHECK: %[[VAL_5:.*]] = arith.constant 0 : index + # CHECK: %[[VAL_6:.*]] = arith.constant 0 : index + # CHECK: %[[VAL_7:.*]] = arith.constant 100 : index + # CHECK: %[[VAL_8:.*]] = arith.constant 1 : index + # CHECK: %[[VAL_10:.*]] = scf.for %[[IV:.*]] = %[[VAL_6]] to %[[VAL_7]] step %[[VAL_8]] iter_args(%[[ITER:.*]] = %[[VAL_4]]) -> (index) { + # CHECK: %[[VAL_9:.*]] = arith.addi %[[ITER]], %[[IV]] : index + # CHECK: scf.yield %[[VAL_9]] : index + # CHECK: } + # CHECK: memref.store %[[VAL_10]], %[[VAL_3]]{{\[}}%[[VAL_5]]] : memref<10xindex> + # CHECK: return + # CHECK: } + @func.FuncOp.from_py_func(index_type, index_type, index_type, memref_t) + def loop_yield_1(lb, ub, step, memref_v): + sum = arith.ConstantOp.create_index(0) + c0 = arith.ConstantOp.create_index(0) + for i, loc_sum, sum in scf.for_(0, 100, 1, [sum]): + loc_sum = arith.addi(loc_sum, i) + scf.yield_([loc_sum]) + memref.store(sum, memref_v, [c0]) + + # CHECK: func.func @loop_yield_2(%[[VAL_0:.*]]: index, %[[VAL_1:.*]]: index, %[[VAL_2:.*]]: index, %[[VAL_3:.*]]: memref<10xindex>) { + # CHECK: %[[c0:.*]] = arith.constant 0 : index + # CHECK: %[[c2:.*]] = arith.constant 2 : index + # CHECK: %[[REF1:.*]] = arith.constant 0 : index + # CHECK: %[[REF2:.*]] = arith.constant 1 : index + # CHECK: %[[VAL_6:.*]] = arith.constant 0 : index + # CHECK: %[[VAL_7:.*]] = arith.constant 100 : index + # CHECK: %[[VAL_8:.*]] = arith.constant 1 : index + # CHECK: %[[RES:.*]] = scf.for %[[IV:.*]] = %[[VAL_6]] to %[[VAL_7]] step %[[VAL_8]] iter_args(%[[ITER1:.*]] = %[[c0]], %[[ITER2:.*]] = %[[c2]]) -> (index, index) { + # CHECK: %[[VAL_9:.*]] = arith.addi %[[ITER1]], %[[IV]] : index + # CHECK: %[[VAL_10:.*]] = arith.addi %[[ITER2]], %[[IV]] : index + # CHECK: scf.yield %[[VAL_9]], %[[VAL_10]] : index, index + # CHECK: } + # CHECK: return + # CHECK: } + @func.FuncOp.from_py_func(index_type, index_type, index_type, memref_t) + def loop_yield_2(lb, ub, step, memref_v): + sum1 = arith.ConstantOp.create_index(0) + sum2 = arith.ConstantOp.create_index(2) + c0 = arith.ConstantOp.create_index(0) + c1 = arith.ConstantOp.create_index(1) + for i, [loc_sum1, loc_sum2], [sum1, sum2] in scf.for_(0, 100, 1, [sum1, sum2]): + loc_sum1 = arith.addi(loc_sum1, i) + loc_sum2 = arith.addi(loc_sum2, i) + scf.yield_([loc_sum1, loc_sum2]) + memref.store(sum1, memref_v, [c0]) + memref.store(sum2, memref_v, [c1]) + @constructAndPrintInModule def testOpsAsArguments(): diff --git a/offload/src/PluginManager.cpp b/offload/src/PluginManager.cpp index f72007849e36e..13f08b142b876 100644 --- a/offload/src/PluginManager.cpp +++ b/offload/src/PluginManager.cpp @@ -155,11 +155,11 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) { // Initialize all the plugins that have associated images. for (auto &Plugin : Plugins) { - if (Plugin->is_initialized()) - continue; - // Extract the exectuable image and extra information if availible. for (int32_t i = 0; i < Desc->NumDeviceImages; ++i) { + if (Plugin->is_initialized()) + continue; + if (!Plugin->is_valid_binary(&Desc->DeviceImages[i], /*Initialized=*/false)) continue; diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 612d784be8a55..62c35c19e6b45 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -177,6 +177,7 @@ else() add_llvm_library(omp ${LIBOMP_LIBRARY_KIND} ${LIBOMP_SOURCE_FILES} PARTIAL_SOURCES_INTENDED LINK_LIBS ${LIBOMP_CONFIGURED_LIBFLAGS} ${LIBOMP_DL_LIBS} LINK_COMPONENTS Support + BUILDTREE_ONLY ) # libomp must be a C++ library such that it can link libLLVMSupport set(LIBOMP_LINKER_LANGUAGE CXX) diff --git a/openmp/tools/archer/ompt-tsan.cpp b/openmp/tools/archer/ompt-tsan.cpp index de77e25db2d39..d7658077e83ae 100644 --- a/openmp/tools/archer/ompt-tsan.cpp +++ b/openmp/tools/archer/ompt-tsan.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -29,7 +30,6 @@ #include #include #include -#include #include "omp-tools.h" @@ -146,18 +146,28 @@ void __attribute__((weak)) __tsan_flush_memory() {} static ArcherFlags *archer_flags; #ifndef TsanHappensBefore + +template static void __ompt_tsan_func(Args...) {} + +#define DECLARE_TSAN_FUNCTION(name, ...) \ + static void (*name)(__VA_ARGS__) = __ompt_tsan_func<__VA_ARGS__>; + // Thread Sanitizer is a tool that finds races in code. // See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations . // tsan detects these exact functions by name. extern "C" { -static void (*AnnotateHappensAfter)(const char *, int, const volatile void *); -static void (*AnnotateHappensBefore)(const char *, int, const volatile void *); -static void (*AnnotateIgnoreWritesBegin)(const char *, int); -static void (*AnnotateIgnoreWritesEnd)(const char *, int); -static void (*AnnotateNewMemory)(const char *, int, const volatile void *, - size_t); -static void (*__tsan_func_entry)(const void *); -static void (*__tsan_func_exit)(void); +DECLARE_TSAN_FUNCTION(AnnotateHappensAfter, const char *, int, + const volatile void *) +DECLARE_TSAN_FUNCTION(AnnotateHappensBefore, const char *, int, + const volatile void *) +DECLARE_TSAN_FUNCTION(AnnotateIgnoreWritesBegin, const char *, int) +DECLARE_TSAN_FUNCTION(AnnotateIgnoreWritesEnd, const char *, int) +DECLARE_TSAN_FUNCTION(AnnotateNewMemory, const char *, int, + const volatile void *, size_t) +DECLARE_TSAN_FUNCTION(__tsan_func_entry, const void *) +DECLARE_TSAN_FUNCTION(__tsan_func_exit) + +// RunningOnValgrind is used to detect absence of TSan and must intentionally be a nullptr. static int (*RunningOnValgrind)(void); } @@ -1142,7 +1152,10 @@ static void ompt_tsan_mutex_released(ompt_mutex_t kind, ompt_wait_id_t wait_id, #define findTsanFunction(f, fSig) \ do { \ - if (NULL == (f = fSig dlsym(RTLD_DEFAULT, #f))) \ + void *fp = dlsym(RTLD_DEFAULT, #f); \ + if (fp) \ + f = fSig fp; \ + else \ printf("Unable to find TSan function " #f ".\n"); \ } while (0) diff --git a/polly/include/polly/ScheduleTreeTransform.h b/polly/include/polly/ScheduleTreeTransform.h index ee504c4e5f524..6bd5a3abf9ea2 100644 --- a/polly/include/polly/ScheduleTreeTransform.h +++ b/polly/include/polly/ScheduleTreeTransform.h @@ -47,9 +47,9 @@ struct ScheduleTreeVisitor { return getDerived().visitSequence(Node.as(), std::forward(args)...); case isl_schedule_node_set: + assert(isl_schedule_node_n_children(Node.get()) >= 2); return getDerived().visitSet(Node.as(), std::forward(args)...); - assert(isl_schedule_node_n_children(Node.get()) >= 2); case isl_schedule_node_leaf: assert(isl_schedule_node_n_children(Node.get()) == 0); return getDerived().visitLeaf(Node.as(), diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 446499cf15d7b..70ec3a48a5e2e 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -127,6 +127,11 @@ libc_support_library( hdrs = ["hdr/time_macros.h"], ) +libc_support_library( + name = "hdr_float_macros", + hdrs = ["hdr/float_macros.h"], +) + ############################ Type Proxy Header Files ########################### libc_support_library( @@ -189,7 +194,7 @@ libc_support_library( ":__support_macros_properties_compiler", ":__support_macros_properties_cpu_features", ":__support_macros_properties_os", - ":llvm_libc_macros_float_macros", + ":hdr_float_macros", ":llvm_libc_types_float128", ], ) diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index d1a2c6f11d98a..a67f20533ae22 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -2232,7 +2232,7 @@ llvm_target_lib_list = [lib for lib in [ ("-gen-callingconv", "lib/Target/X86/X86GenCallingConv.inc"), ("-gen-subtarget", "lib/Target/X86/X86GenSubtargetInfo.inc"), ("-gen-x86-fold-tables -asmwriternum=1", "lib/Target/X86/X86GenFoldTables.inc"), - ("-gen-x86-compress-evex-tables", "lib/Target/X86/X86GenCompressEVEXTables.inc"), + ("-gen-x86-instr-mapping", "lib/Target/X86/X86GenInstrMapping.inc"), ("-gen-exegesis", "lib/Target/X86/X86GenExegesis.inc"), ("-gen-x86-mnemonic-tables -asmwriternum=1", "lib/Target/X86/X86GenMnemonicTables.inc"), ], diff --git a/utils/bazel/llvm_configs/llvm-config.h.cmake b/utils/bazel/llvm_configs/llvm-config.h.cmake index 6605ea60df99e..629977cc11d68 100644 --- a/utils/bazel/llvm_configs/llvm-config.h.cmake +++ b/utils/bazel/llvm_configs/llvm-config.h.cmake @@ -198,4 +198,7 @@ /* Define if plugins enabled */ #cmakedefine LLVM_ENABLE_PLUGINS +/* Define if logf128 is available */ +#cmakedefine LLVM_HAS_LOGF128 + #endif