Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AArch64][SME2] Add SME2 MLA/MLS builtins. #75584

Merged
merged 7 commits into from Dec 21, 2023

Conversation

dtemirbulatov
Copy link
Contributor

Add SME2 MLA/MLS builtins.

@llvmbot llvmbot added clang Clang issues not falling into any other category clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:codegen labels Dec 15, 2023
@llvmbot
Copy link
Collaborator

llvmbot commented Dec 15, 2023

@llvm/pr-subscribers-clang-codegen

Author: Dinar Temirbulatov (dtemirbulatov)

Changes

Add SME2 MLA/MLS builtins.


Patch is 291.39 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/75584.diff

5 Files Affected:

  • (modified) clang/include/clang/Basic/arm_sme.td (+217)
  • (modified) clang/lib/CodeGen/CGBuiltin.cpp (+25)
  • (added) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlal.c (+760)
  • (added) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlall.c (+1950)
  • (added) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlsl.c (+760)
diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index fcff6fe35b7ba3..40256544c7bbc6 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -315,6 +315,223 @@ let TargetGuard = "sme2" in {
   def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>;
 }
 
+// FMLA/FMLS
+let TargetGuard = "sme2" in {
+  def SVMLA_MULTI_VG1x2_F32 : Inst<"svmla_za32[_{d}]_vg1x2", "vm22", "f", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_MULTI_VG1x4_F32 : Inst<"svmla_za32[_{d}]_vg1x4", "vm44", "f", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x2_F32 : Inst<"svmls_za32[_{d}]_vg1x2", "vm22", "f", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x4_F32 : Inst<"svmls_za32[_{d}]_vg1x4", "vm44", "f", MergeNone, "aarch64_sme_fmls_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_SINGLE_VG1x2_F32 : Inst<"svmla[_single]_za32[_{d}]_vg1x2", "vm2d", "f", MergeNone, "aarch64_sme_fmla_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_SINGLE_VG1x4_F32 : Inst<"svmla[_single]_za32[_{d}]_vg1x4", "vm4d", "f", MergeNone, "aarch64_sme_fmla_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x2_F32 : Inst<"svmls[_single]_za32[_{d}]_vg1x2", "vm2d", "f", MergeNone, "aarch64_sme_fmls_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x4_F32 : Inst<"svmls[_single]_za32[_{d}]_vg1x4", "vm4d", "f", MergeNone, "aarch64_sme_fmls_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_LANE_VG1x2_F32 : Inst<"svmla_lane_za32[_{d}]_vg1x2", "vm2di", "f", MergeNone, "aarch64_sme_fmla_lane_vg1x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>;
+  def SVMLA_LANE_VG1x4_F32 : Inst<"svmla_lane_za32[_{d}]_vg1x4", "vm4di", "f", MergeNone, "aarch64_sme_fmla_lane_vg1x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>;
+  def SVMLS_LANE_VG1x2_F32 : Inst<"svmls_lane_za32[_{d}]_vg1x2", "vm2di", "f", MergeNone, "aarch64_sme_fmls_lane_vg1x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>;
+  def SVMLS_LANE_VG1x4_F32 : Inst<"svmls_lane_za32[_{d}]_vg1x4", "vm4di", "f", MergeNone, "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>;
+}
+
+let TargetGuard = "sme2,sme-f64f64" in {
+  def SVMLA_MULTI_VG1x2_F64 : Inst<"svmla_za64[_{d}]_vg1x2", "vm22", "d", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_MULTI_VG1x4_F64 : Inst<"svmla_za64[_{d}]_vg1x4", "vm44", "d", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x2_F64 : Inst<"svmls_za64[_{d}]_vg1x2", "vm22", "d", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x4_F64 : Inst<"svmls_za64[_{d}]_vg1x4", "vm44", "d", MergeNone, "aarch64_sme_fmls_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_SINGLE_VG1x2_F64 : Inst<"svmla[_single]_za64[_{d}]_vg1x2", "vm2d", "d", MergeNone, "aarch64_sme_fmla_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_SINGLE_VG1x4_F64 : Inst<"svmla[_single]_za64[_{d}]_vg1x4", "vm4d", "d", MergeNone, "aarch64_sme_fmla_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x2_F64 : Inst<"svmls[_single]_za64[_{d}]_vg1x2", "vm2d", "d", MergeNone, "aarch64_sme_fmls_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x4_F64 : Inst<"svmls[_single]_za64[_{d}]_vg1x4", "vm4d", "d", MergeNone, "aarch64_sme_fmls_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_LANE_VG1x2_F64 : Inst<"svmla_lane_za64[_{d}]_vg1x2", "vm2di", "d", MergeNone, "aarch64_sme_fmla_lane_vg1x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_1>]>;
+  def SVMLA_LANE_VG1x4_F64 : Inst<"svmla_lane_za64[_{d}]_vg1x4", "vm4di", "d", MergeNone, "aarch64_sme_fmla_lane_vg1x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_1>]>;
+  def SVMLS_LANE_VG1x2_F64 : Inst<"svmls_lane_za64[_{d}]_vg1x2", "vm2di", "d", MergeNone, "aarch64_sme_fmls_lane_vg1x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_1>]>;
+  def SVMLS_LANE_VG1x4_F64 : Inst<"svmls_lane_za64[_{d}]_vg1x4", "vm4di", "d", MergeNone, "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_1>]>;
+}
+
+// FMLAL/FMLSL/UMLAL/SMLAL
+// SMLALL/UMLALL/USMLALL/SUMLALL
+let TargetGuard = "sme2" in {
+  // MULTI MLAL
+  def SVMLAL_MULTI_VG2x2_F16 : Inst<"svmla_za32[_{d}]_vg2x2", "vm22", "bh", MergeNone, "aarch64_sme_fmlal_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG2x4_F16 : Inst<"svmla_za32[_{d}]_vg2x4", "vm44", "bh", MergeNone, "aarch64_sme_fmlal_vg2x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG2x2_S16 : Inst<"svmla_za32[_{d}]_vg2x2", "vm22", "s", MergeNone, "aarch64_sme_smlal_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG2x4_S16 : Inst<"svmla_za32[_{d}]_vg2x4", "vm44", "s", MergeNone, "aarch64_sme_smlal_vg2x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG2x2_U16 : Inst<"svmla_za32[_{d}]_vg2x2", "vm22", "Us", MergeNone, "aarch64_sme_umlal_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG2x4_U16 : Inst<"svmla_za32[_{d}]_vg2x4", "vm44", "Us", MergeNone, "aarch64_sme_umlal_vg2x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLAL_MULTI_VG4x2_S8 : Inst<"svmla_za32[_{d}]_vg4x2", "vm22", "c", MergeNone, "aarch64_sme_smla_za32_vg4x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG4x2_U8 : Inst<"svmla_za32[_{d}]_vg4x2", "vm22", "Uc", MergeNone, "aarch64_sme_umla_za32_vg4x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG4x4_S8 : Inst<"svmla_za32[_{d}]_vg4x4", "vm44", "c", MergeNone, "aarch64_sme_smla_za32_vg4x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG4x4_U8 : Inst<"svmla_za32[_{d}]_vg4x4", "vm44", "Uc", MergeNone, "aarch64_sme_umla_za32_vg4x4", [IsStreaming, IsSharedZA], []>;
+
+  let TargetGuard = "sme-i16i64" in {
+    def SVMLAL_MULTI_VG4x2_S16 : Inst<"svmla_za64[_{d}]_vg4x2", "vm22", "s", MergeNone, "aarch64_sme_smla_za64_vg4x2", [IsStreaming, IsSharedZA], []>;
+    def SVMLAL_MULTI_VG4x2_U16 : Inst<"svmla_za64[_{d}]_vg4x2", "vm22", "Us", MergeNone, "aarch64_sme_umla_za64_vg4x2", [IsStreaming, IsSharedZA], []>;
+    def SVMLAL_MULTI_VG4x4_S16 : Inst<"svmla_za64[_{d}]_vg4x4", "vm44", "s", MergeNone, "aarch64_sme_smla_za64_vg4x4", [IsStreaming, IsSharedZA], []>;
+    def SVMLAL_MULTI_VG4x4_U16 : Inst<"svmla_za64[_{d}]_vg4x4", "vm44", "Us", MergeNone, "aarch64_sme_umla_za64_vg4x4", [IsStreaming, IsSharedZA], []>;
+  }
+
+  // MULTI MLSL
+  def SVMLSL_MULTI_VG2x2_F16 : Inst<"svmls_za32[_{d}]_vg2x2", "vm22", "bh", MergeNone, "aarch64_sme_fmlsl_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_MULTI_VG2x4_F16 : Inst<"svmls_za32[_{d}]_vg2x4", "vm44", "bh", MergeNone, "aarch64_sme_fmlsl_vg2x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_MULTI_VG2x2_S16 : Inst<"svmls_za32[_{d}]_vg2x2", "vm22", "s", MergeNone, "aarch64_sme_smlsl_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_MULTI_VG2x4_S16 : Inst<"svmls_za32[_{d}]_vg2x4", "vm44", "s", MergeNone, "aarch64_sme_smlsl_vg2x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_MULTI_VG2x2_U16 : Inst<"svmls_za32[_{d}]_vg2x2", "vm22", "Us", MergeNone, "aarch64_sme_umlsl_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_MULTI_VG2x4_U16 : Inst<"svmls_za32[_{d}]_vg2x4", "vm44", "Us", MergeNone, "aarch64_sme_umlsl_vg2x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLSL_MULTI_VG4x2_S8 : Inst<"svmls_za32[_{d}]_vg4x2", "vm22", "c", MergeNone, "aarch64_sme_smls_za32_vg4x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_MULTI_VG4x2_U8 : Inst<"svmls_za32[_{d}]_vg4x2", "vm22", "Uc", MergeNone, "aarch64_sme_umls_za32_vg4x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_MULTI_VG4x4_S8 : Inst<"svmls_za32[_{d}]_vg4x4", "vm44", "c", MergeNone, "aarch64_sme_smls_za32_vg4x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_MULTI_VG4x4_U8 : Inst<"svmls_za32[_{d}]_vg4x4", "vm44", "Uc", MergeNone, "aarch64_sme_umls_za32_vg4x4", [IsStreaming, IsSharedZA], []>;
+
+  let TargetGuard = "sme-i16i64" in {
+    def SVMLSL_MULTI_VG4x2_S16 : Inst<"svmls_za64[_{d}]_vg4x2", "vm22", "s", MergeNone, "aarch64_sme_smls_za64_vg4x2", [IsStreaming, IsSharedZA], []>;
+    def SVMLSL_MULTI_VG4x2_U16 : Inst<"svmls_za64[_{d}]_vg4x2", "vm22", "Us", MergeNone, "aarch64_sme_umls_za64_vg4x2", [IsStreaming, IsSharedZA], []>;
+    def SVMLSL_MULTI_VG4x4_S16 : Inst<"svmls_za64[_{d}]_vg4x4", "vm44", "s", MergeNone, "aarch64_sme_smls_za64_vg4x4", [IsStreaming, IsSharedZA], []>;
+    def SVMLSL_MULTI_VG4x4_U16 : Inst<"svmls_za64[_{d}]_vg4x4", "vm44", "Us", MergeNone, "aarch64_sme_umls_za64_vg4x4", [IsStreaming, IsSharedZA], []>;
+  }
+
+  // SINGLE MLAL
+  def SVMLAL_SINGLE_VG2x1_F16 : Inst<"svmla_za32[_{d}]_vg2x1",          "vmdd", "bh", MergeNone, "aarch64_sme_fmlal_single_vg2x1", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG2x2_F16 : Inst<"svmla[_single]_za32[_{d}]_vg2x2", "vm2d", "bh", MergeNone, "aarch64_sme_fmlal_single_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG2x4_F16 : Inst<"svmla[_single]_za32[_{d}]_vg2x4", "vm4d", "bh", MergeNone, "aarch64_sme_fmlal_single_vg2x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG2x1_S16 : Inst<"svmla_za32[_{d}]_vg2x1",          "vmdd", "s", MergeNone, "aarch64_sme_smlal_single_vg2x1", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG2x2_S16 : Inst<"svmla[_single]_za32[_{d}]_vg2x2", "vm2d", "s", MergeNone, "aarch64_sme_smlal_single_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG2x4_S16 : Inst<"svmla[_single]_za32[_{d}]_vg2x4", "vm4d", "s", MergeNone, "aarch64_sme_smlal_single_vg2x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG2x1_U16 : Inst<"svmla_za32[_{d}]_vg2x1",          "vmdd", "Us", MergeNone, "aarch64_sme_umlal_single_vg2x1", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG2x2_U16 : Inst<"svmla[_single]_za32[_{d}]_vg2x2", "vm2d", "Us", MergeNone, "aarch64_sme_umlal_single_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG2x4_U16 : Inst<"svmla[_single]_za32[_{d}]_vg2x4", "vm4d", "Us", MergeNone, "aarch64_sme_umlal_single_vg2x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLAL_SINGLE_VG4x1_S8  : Inst<"svmla_za32[_{d}]_vg4x1",          "vmdd", "c", MergeNone, "aarch64_sme_smla_za32_single_vg4x1", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG4x1_U8  : Inst<"svmla_za32[_{d}]_vg4x1",          "vmdd", "Uc", MergeNone, "aarch64_sme_umla_za32_single_vg4x1", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG4x2_S8  : Inst<"svmla[_single]_za32[_{d}]_vg4x2", "vm2d", "c", MergeNone, "aarch64_sme_smla_za32_single_vg4x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG4x2_U8  : Inst<"svmla[_single]_za32[_{d}]_vg4x2", "vm2d", "Uc", MergeNone, "aarch64_sme_umla_za32_single_vg4x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG4x4_S8  : Inst<"svmla[_single]_za32[_{d}]_vg4x4", "vm4d", "c", MergeNone, "aarch64_sme_smla_za32_single_vg4x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG4x4_U8  : Inst<"svmla[_single]_za32[_{d}]_vg4x4", "vm4d", "Uc", MergeNone, "aarch64_sme_umla_za32_single_vg4x4", [IsStreaming, IsSharedZA], []>;
+
+  let TargetGuard = "sme-i16i64" in {
+    def SVMLAL_SINGLE_VG4x1_S16 : Inst<"svmla_za64[_{d}]_vg4x1",          "vmdd", "s", MergeNone, "aarch64_sme_smla_za64_single_vg4x1", [IsStreaming, IsSharedZA], []>;
+    def SVMLAL_SINGLE_VG4x1_U16 : Inst<"svmla_za64[_{d}]_vg4x1",          "vmdd", "Us", MergeNone, "aarch64_sme_umla_za64_single_vg4x1", [IsStreaming, IsSharedZA], []>;
+    def SVMLAL_SINGLE_VG4x2_S16 : Inst<"svmla[_single]_za64[_{d}]_vg4x2", "vm2d", "s", MergeNone, "aarch64_sme_smla_za64_single_vg4x2", [IsStreaming, IsSharedZA], []>;
+    def SVMLAL_SINGLE_VG4x2_U16 : Inst<"svmla[_single]_za64[_{d}]_vg4x2", "vm2d", "Us", MergeNone, "aarch64_sme_umla_za64_single_vg4x2", [IsStreaming, IsSharedZA], []>;
+    def SVMLAL_SINGLE_VG4x4_S16 : Inst<"svmla[_single]_za64[_{d}]_vg4x4", "vm4d", "s", MergeNone, "aarch64_sme_smla_za64_single_vg4x4", [IsStreaming, IsSharedZA], []>;
+    def SVMLAL_SINGLE_VG4x4_U16 : Inst<"svmla[_single]_za64[_{d}]_vg4x4", "vm4d", "Us", MergeNone, "aarch64_sme_umla_za64_single_vg4x4", [IsStreaming, IsSharedZA], []>;
+  }
+
+  // SINGLE MLSL
+  def SVMLSL_SINGLE_VG2x1_F16 : Inst<"svmls_za32[_{d}]_vg2x1",          "vmdd", "bh", MergeNone, "aarch64_sme_fmlsl_single_vg2x1", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG2x2_F16 : Inst<"svmls[_single]_za32[_{d}]_vg2x2", "vm2d", "bh", MergeNone, "aarch64_sme_fmlsl_single_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG2x4_F16 : Inst<"svmls[_single]_za32[_{d}]_vg2x4", "vm4d", "bh", MergeNone, "aarch64_sme_fmlsl_single_vg2x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG2x1_S16 : Inst<"svmls_za32[_{d}]_vg2x1",          "vmdd", "s", MergeNone, "aarch64_sme_smlsl_single_vg2x1", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG2x2_S16 : Inst<"svmls[_single]_za32[_{d}]_vg2x2", "vm2d", "s", MergeNone, "aarch64_sme_smlsl_single_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG2x4_S16 : Inst<"svmls[_single]_za32[_{d}]_vg2x4", "vm4d", "s", MergeNone, "aarch64_sme_smlsl_single_vg2x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG2x1_U16 : Inst<"svmls_za32[_{d}]_vg2x1",          "vmdd", "Us", MergeNone, "aarch64_sme_umlsl_single_vg2x1", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG2x2_U16 : Inst<"svmls[_single]_za32[_{d}]_vg2x2", "vm2d", "Us", MergeNone, "aarch64_sme_umlsl_single_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG2x4_U16 : Inst<"svmls[_single]_za32[_{d}]_vg2x4", "vm4d", "Us", MergeNone, "aarch64_sme_umlsl_single_vg2x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLSL_SINGLE_VG4x1_S8  : Inst<"svmls_za32[_{d}]_vg4x1",          "vmdd", "c", MergeNone, "aarch64_sme_smls_za32_single_vg4x1", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG4x1_U8  : Inst<"svmls_za32[_{d}]_vg4x1",          "vmdd", "Uc", MergeNone, "aarch64_sme_umls_za32_single_vg4x1", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG4x2_S8  : Inst<"svmls[_single]_za32[_{d}]_vg4x2", "vm2d", "c", MergeNone, "aarch64_sme_smls_za32_single_vg4x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG4x2_U8  : Inst<"svmls[_single]_za32[_{d}]_vg4x2", "vm2d", "Uc", MergeNone, "aarch64_sme_umls_za32_single_vg4x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG4x4_S8  : Inst<"svmls[_single]_za32[_{d}]_vg4x4", "vm4d", "c", MergeNone, "aarch64_sme_smls_za32_single_vg4x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG4x4_U8  : Inst<"svmls[_single]_za32[_{d}]_vg4x4", "vm4d", "Uc", MergeNone, "aarch64_sme_umls_za32_single_vg4x4", [IsStreaming, IsSharedZA], []>;
+
+  let TargetGuard = "sme-i16i64" in {
+    def SVMLSL_SINGLE_VG4x1_S16 : Inst<"svmls_za64[_{d}]_vg4x1",          "vmdd", "s", MergeNone, "aarch64_sme_smls_za64_single_vg4x1", [IsStreaming, IsSharedZA], []>;
+    def SVMLSL_SINGLE_VG4x1_U16 : Inst<"svmls_za64[_{d}]_vg4x1",          "vmdd", "Us", MergeNone, "aarch64_sme_umls_za64_single_vg4x1", [IsStreaming, IsSharedZA], []>;
+    def SVMLSL_SINGLE_VG4x2_S16 : Inst<"svmls[_single]_za64[_{d}]_vg4x2", "vm2d", "s", MergeNone, "aarch64_sme_smls_za64_single_vg4x2", [IsStreaming, IsSharedZA], []>;
+    def SVMLSL_SINGLE_VG4x2_U16 : Inst<"svmls[_single]_za64[_{d}]_vg4x2", "vm2d", "Us", MergeNone, "aarch64_sme_umls_za64_single_vg4x2", [IsStreaming, IsSharedZA], []>;
+    def SVMLSL_SINGLE_VG4x4_S16 : Inst<"svmls[_single]_za64[_{d}]_vg4x4", "vm4d", "s", MergeNone, "aarch64_sme_smls_za64_single_vg4x4", [IsStreaming, IsSharedZA], []>;
+    def SVMLSL_SINGLE_VG4x4_U16 : Inst<"svmls[_single]_za64[_{d}]_vg4x4", "vm4d", "Us", MergeNone, "aarch64_sme_umls_za64_single_vg4x4", [IsStreaming, IsSharedZA], []>;
+  }
+
+  // INDEXED MLAL
+  def SVMLAL_LANE_VG2x1_F16 : Inst<"svmla_lane_za32[_{d}]_vg2x1", "vmddi", "bh", MergeNone, "aarch64_sme_fmlal_lane_vg2x1", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVMLAL_LANE_VG2x2_F16 : Inst<"svmla_lane_za32[_{d}]_vg2x2", "vm2di", "bh", MergeNone, "aarch64_sme_fmlal_lane_vg2x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVMLAL_LANE_VG2x4_F16 : Inst<"svmla_lane_za32[_{d}]_vg2x4", "vm4di", "bh", MergeNone, "aarch64_sme_fmlal_lane_vg2x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVMLAL_LANE_VG2x1_S16 : Inst<"svmla_lane_za32[_{d}]_vg2x1", "vmddi", "s", MergeNone, "aarch64_sme_smlal_lane_vg2x1", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVMLAL_LANE_VG2x2_S16 : Inst<"svmla_lane_za32[_{d}]_vg2x2", "vm2di", "s", MergeNone, "aarch64_sme_smlal_lane_vg2x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVMLAL_LANE_VG2x4_S16 : Inst<"svmla_lane_za32[_{d}]_vg2x4", "vm4di", "s", MergeNone, "aarch64_sme_smlal_lane_vg2x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVMLAL_LANE_VG2x1_U16 : Inst<"svmla_lane_za32[_{d}]_vg2x1", "vmddi", "Us", MergeNone, "aarch64_sme_umlal_lane_vg2x1", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVMLAL_LANE_VG2x2_U16 : Inst<"svmla_lane_za32[_{d}]_vg2x2", "vm2di", "Us", MergeNone, "aarch64_sme_umlal_lane_vg2x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVMLAL_LANE_VG2x4_U16 : Inst<"svmla_lane_za32[_{d}]_vg2x4", "vm4di", "Us", MergeNone, "aarch64_sme_umlal_lane_vg2x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+
+  def SVMLAL_LANE_VG4x1_S8 : Inst<"svmla_lane_za32[_{d}]_vg4x1", "vmddi", "c", MergeNone, "aarch64_sme_smla_za32_lane_vg4x1", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_15>]>;
+  def SVMLAL_LANE_VG4x1_U8 : Inst<"svmla_lane_za32[_{d}]_vg4x1", "vmddi", "Uc", MergeNone, "aarch64_sme_umla_za32_lane_vg4x1", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_15>]>;
+  def SVMLAL_LANE_VG4x2_S8 : Inst<"svmla_lane_za32[_{d}]_vg4x2", "vm2di", "c", MergeNone, "aarch64_sme_smla_za32_lane_vg4x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_15>]>;
+  def SVMLAL_LANE_VG4x2_U8 : Inst<"svmla_lane_za32[_{d}]_vg4x2", "vm2di", "Uc", MergeNone, "aarch64_sme_umla_za32_lane_vg4x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_15>]>;
+  def SVMLAL_LANE_VG4x4_S8 : Inst<"svmla_lane_za32[_{d}]_vg4x4", "vm4di", "c", MergeNone, "aarch64_sme_smla_za32_lane_vg4x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_15>]>;
+  def SVMLAL_LANE_VG4x4_U8 : Inst<"svmla_lane_za32[_{d}]_vg4x4", "vm4di", "Uc", MergeNone, "aarch64_sme_umla_za32_lane_vg4x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_15>]>;
+
+  let TargetGuard = "sme-i16i64" in {
+    def SVMLAL_LANE_VG4x1_S16 : Inst<"svmla_lane_za64[_{d}]_vg4x1", "vmddi", "s", MergeNone, "aarch64_sme_smla_za64_lane_vg4x1", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+    def SVMLAL_LANE_VG4x1_U16 : Inst<"svmla_lane_za64[_{d}]_vg4x1", "vmddi", "Us", MergeNone, "aarch64_sme_umla_za64_lane_vg4x1", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+    def SVMLAL_LANE_VG4x2_S16 : Inst<"svmla_lane_za64[_{d}]_vg4x2", "vm2di", "s", MergeNone, "aarch64_sme_smla_za64_lane_vg4x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+    def SVMLAL_LANE_VG4x2_U16 : Inst<"svmla_lane_za64[_{d}]_vg4x2", "vm2di", "Us", MergeNone, "aarch64_sme_umla_za64_lane_vg4x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+    def SVMLAL_LANE_VG4x4_S16 : Inst<"svmla_lane_za64[_{d}]_vg4x4", "vm4di", "s", MergeNone, "aarch64_sme_smla_za64_lane_vg4x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+    def SVMLAL_LANE_VG4x4_U16 : Inst<"svmla_lane_za64[_{d}]_vg4x4", "vm4di", "Us", MergeNone, "aarch64_sme_umla_za64_lane_vg4x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+  }
+
+  // INDEXED MLSL
+  def SVMLSL_LANE_VG2x1_F16 : Inst<"svmls_lane_za32[_{d}]_vg2x1", "vmddi", "bh", MergeNone, "aarch64_sme_fmlsl_lane_vg2x1", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVMLSL_LANE_VG2x2_F16 : Inst<"svmls_lane_za32[_{d}]_vg2x2", "vm2di", "bh", MergeNone, "aarch64_sme_fmlsl_lane_vg2x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVMLSL_LANE_VG2x4_F16 : Inst<"svmls_lane_za32[_{d}]_vg2x4", "vm4di", "bh", MergeNone, "aarch64_sme_fmlsl_lane_vg2x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVMLSL_LANE_VG2x1_S16 : Inst<"svmls_lane_za32[_{d}]_vg2x1", ...
[truncated]

@llvmbot
Copy link
Collaborator

llvmbot commented Dec 15, 2023

@llvm/pr-subscribers-clang

Author: Dinar Temirbulatov (dtemirbulatov)

Changes

Add SME2 MLA/MLS builtins.


Patch is 291.39 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/75584.diff

5 Files Affected:

  • (modified) clang/include/clang/Basic/arm_sme.td (+217)
  • (modified) clang/lib/CodeGen/CGBuiltin.cpp (+25)
  • (added) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlal.c (+760)
  • (added) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlall.c (+1950)
  • (added) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlsl.c (+760)
diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index fcff6fe35b7ba3..40256544c7bbc6 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -315,6 +315,223 @@ let TargetGuard = "sme2" in {
   def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>;
 }
 
+// FMLA/FMLS
+let TargetGuard = "sme2" in {
+  def SVMLA_MULTI_VG1x2_F32 : Inst<"svmla_za32[_{d}]_vg1x2", "vm22", "f", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_MULTI_VG1x4_F32 : Inst<"svmla_za32[_{d}]_vg1x4", "vm44", "f", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x2_F32 : Inst<"svmls_za32[_{d}]_vg1x2", "vm22", "f", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x4_F32 : Inst<"svmls_za32[_{d}]_vg1x4", "vm44", "f", MergeNone, "aarch64_sme_fmls_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_SINGLE_VG1x2_F32 : Inst<"svmla[_single]_za32[_{d}]_vg1x2", "vm2d", "f", MergeNone, "aarch64_sme_fmla_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_SINGLE_VG1x4_F32 : Inst<"svmla[_single]_za32[_{d}]_vg1x4", "vm4d", "f", MergeNone, "aarch64_sme_fmla_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x2_F32 : Inst<"svmls[_single]_za32[_{d}]_vg1x2", "vm2d", "f", MergeNone, "aarch64_sme_fmls_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x4_F32 : Inst<"svmls[_single]_za32[_{d}]_vg1x4", "vm4d", "f", MergeNone, "aarch64_sme_fmls_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_LANE_VG1x2_F32 : Inst<"svmla_lane_za32[_{d}]_vg1x2", "vm2di", "f", MergeNone, "aarch64_sme_fmla_lane_vg1x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>;
+  def SVMLA_LANE_VG1x4_F32 : Inst<"svmla_lane_za32[_{d}]_vg1x4", "vm4di", "f", MergeNone, "aarch64_sme_fmla_lane_vg1x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>;
+  def SVMLS_LANE_VG1x2_F32 : Inst<"svmls_lane_za32[_{d}]_vg1x2", "vm2di", "f", MergeNone, "aarch64_sme_fmls_lane_vg1x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>;
+  def SVMLS_LANE_VG1x4_F32 : Inst<"svmls_lane_za32[_{d}]_vg1x4", "vm4di", "f", MergeNone, "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>;
+}
+
+let TargetGuard = "sme2,sme-f64f64" in {
+  def SVMLA_MULTI_VG1x2_F64 : Inst<"svmla_za64[_{d}]_vg1x2", "vm22", "d", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_MULTI_VG1x4_F64 : Inst<"svmla_za64[_{d}]_vg1x4", "vm44", "d", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x2_F64 : Inst<"svmls_za64[_{d}]_vg1x2", "vm22", "d", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x4_F64 : Inst<"svmls_za64[_{d}]_vg1x4", "vm44", "d", MergeNone, "aarch64_sme_fmls_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_SINGLE_VG1x2_F64 : Inst<"svmla[_single]_za64[_{d}]_vg1x2", "vm2d", "d", MergeNone, "aarch64_sme_fmla_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_SINGLE_VG1x4_F64 : Inst<"svmla[_single]_za64[_{d}]_vg1x4", "vm4d", "d", MergeNone, "aarch64_sme_fmla_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x2_F64 : Inst<"svmls[_single]_za64[_{d}]_vg1x2", "vm2d", "d", MergeNone, "aarch64_sme_fmls_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x4_F64 : Inst<"svmls[_single]_za64[_{d}]_vg1x4", "vm4d", "d", MergeNone, "aarch64_sme_fmls_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_LANE_VG1x2_F64 : Inst<"svmla_lane_za64[_{d}]_vg1x2", "vm2di", "d", MergeNone, "aarch64_sme_fmla_lane_vg1x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_1>]>;
+  def SVMLA_LANE_VG1x4_F64 : Inst<"svmla_lane_za64[_{d}]_vg1x4", "vm4di", "d", MergeNone, "aarch64_sme_fmla_lane_vg1x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_1>]>;
+  def SVMLS_LANE_VG1x2_F64 : Inst<"svmls_lane_za64[_{d}]_vg1x2", "vm2di", "d", MergeNone, "aarch64_sme_fmls_lane_vg1x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_1>]>;
+  def SVMLS_LANE_VG1x4_F64 : Inst<"svmls_lane_za64[_{d}]_vg1x4", "vm4di", "d", MergeNone, "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_1>]>;
+}
+
+// FMLAL/FMLSL/UMLAL/SMLAL
+// SMLALL/UMLALL/USMLALL/SUMLALL
+let TargetGuard = "sme2" in {
+  // MULTI MLAL
+  def SVMLAL_MULTI_VG2x2_F16 : Inst<"svmla_za32[_{d}]_vg2x2", "vm22", "bh", MergeNone, "aarch64_sme_fmlal_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG2x4_F16 : Inst<"svmla_za32[_{d}]_vg2x4", "vm44", "bh", MergeNone, "aarch64_sme_fmlal_vg2x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG2x2_S16 : Inst<"svmla_za32[_{d}]_vg2x2", "vm22", "s", MergeNone, "aarch64_sme_smlal_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG2x4_S16 : Inst<"svmla_za32[_{d}]_vg2x4", "vm44", "s", MergeNone, "aarch64_sme_smlal_vg2x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG2x2_U16 : Inst<"svmla_za32[_{d}]_vg2x2", "vm22", "Us", MergeNone, "aarch64_sme_umlal_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG2x4_U16 : Inst<"svmla_za32[_{d}]_vg2x4", "vm44", "Us", MergeNone, "aarch64_sme_umlal_vg2x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLAL_MULTI_VG4x2_S8 : Inst<"svmla_za32[_{d}]_vg4x2", "vm22", "c", MergeNone, "aarch64_sme_smla_za32_vg4x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG4x2_U8 : Inst<"svmla_za32[_{d}]_vg4x2", "vm22", "Uc", MergeNone, "aarch64_sme_umla_za32_vg4x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG4x4_S8 : Inst<"svmla_za32[_{d}]_vg4x4", "vm44", "c", MergeNone, "aarch64_sme_smla_za32_vg4x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG4x4_U8 : Inst<"svmla_za32[_{d}]_vg4x4", "vm44", "Uc", MergeNone, "aarch64_sme_umla_za32_vg4x4", [IsStreaming, IsSharedZA], []>;
+
+  let TargetGuard = "sme-i16i64" in {
+    def SVMLAL_MULTI_VG4x2_S16 : Inst<"svmla_za64[_{d}]_vg4x2", "vm22", "s", MergeNone, "aarch64_sme_smla_za64_vg4x2", [IsStreaming, IsSharedZA], []>;
+    def SVMLAL_MULTI_VG4x2_U16 : Inst<"svmla_za64[_{d}]_vg4x2", "vm22", "Us", MergeNone, "aarch64_sme_umla_za64_vg4x2", [IsStreaming, IsSharedZA], []>;
+    def SVMLAL_MULTI_VG4x4_S16 : Inst<"svmla_za64[_{d}]_vg4x4", "vm44", "s", MergeNone, "aarch64_sme_smla_za64_vg4x4", [IsStreaming, IsSharedZA], []>;
+    def SVMLAL_MULTI_VG4x4_U16 : Inst<"svmla_za64[_{d}]_vg4x4", "vm44", "Us", MergeNone, "aarch64_sme_umla_za64_vg4x4", [IsStreaming, IsSharedZA], []>;
+  }
+
+  // MULTI MLSL
+  def SVMLSL_MULTI_VG2x2_F16 : Inst<"svmls_za32[_{d}]_vg2x2", "vm22", "bh", MergeNone, "aarch64_sme_fmlsl_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_MULTI_VG2x4_F16 : Inst<"svmls_za32[_{d}]_vg2x4", "vm44", "bh", MergeNone, "aarch64_sme_fmlsl_vg2x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_MULTI_VG2x2_S16 : Inst<"svmls_za32[_{d}]_vg2x2", "vm22", "s", MergeNone, "aarch64_sme_smlsl_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_MULTI_VG2x4_S16 : Inst<"svmls_za32[_{d}]_vg2x4", "vm44", "s", MergeNone, "aarch64_sme_smlsl_vg2x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_MULTI_VG2x2_U16 : Inst<"svmls_za32[_{d}]_vg2x2", "vm22", "Us", MergeNone, "aarch64_sme_umlsl_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_MULTI_VG2x4_U16 : Inst<"svmls_za32[_{d}]_vg2x4", "vm44", "Us", MergeNone, "aarch64_sme_umlsl_vg2x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLSL_MULTI_VG4x2_S8 : Inst<"svmls_za32[_{d}]_vg4x2", "vm22", "c", MergeNone, "aarch64_sme_smls_za32_vg4x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_MULTI_VG4x2_U8 : Inst<"svmls_za32[_{d}]_vg4x2", "vm22", "Uc", MergeNone, "aarch64_sme_umls_za32_vg4x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_MULTI_VG4x4_S8 : Inst<"svmls_za32[_{d}]_vg4x4", "vm44", "c", MergeNone, "aarch64_sme_smls_za32_vg4x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_MULTI_VG4x4_U8 : Inst<"svmls_za32[_{d}]_vg4x4", "vm44", "Uc", MergeNone, "aarch64_sme_umls_za32_vg4x4", [IsStreaming, IsSharedZA], []>;
+
+  let TargetGuard = "sme-i16i64" in {
+    def SVMLSL_MULTI_VG4x2_S16 : Inst<"svmls_za64[_{d}]_vg4x2", "vm22", "s", MergeNone, "aarch64_sme_smls_za64_vg4x2", [IsStreaming, IsSharedZA], []>;
+    def SVMLSL_MULTI_VG4x2_U16 : Inst<"svmls_za64[_{d}]_vg4x2", "vm22", "Us", MergeNone, "aarch64_sme_umls_za64_vg4x2", [IsStreaming, IsSharedZA], []>;
+    def SVMLSL_MULTI_VG4x4_S16 : Inst<"svmls_za64[_{d}]_vg4x4", "vm44", "s", MergeNone, "aarch64_sme_smls_za64_vg4x4", [IsStreaming, IsSharedZA], []>;
+    def SVMLSL_MULTI_VG4x4_U16 : Inst<"svmls_za64[_{d}]_vg4x4", "vm44", "Us", MergeNone, "aarch64_sme_umls_za64_vg4x4", [IsStreaming, IsSharedZA], []>;
+  }
+
+  // SINGLE MLAL
+  def SVMLAL_SINGLE_VG2x1_F16 : Inst<"svmla_za32[_{d}]_vg2x1",          "vmdd", "bh", MergeNone, "aarch64_sme_fmlal_single_vg2x1", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG2x2_F16 : Inst<"svmla[_single]_za32[_{d}]_vg2x2", "vm2d", "bh", MergeNone, "aarch64_sme_fmlal_single_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG2x4_F16 : Inst<"svmla[_single]_za32[_{d}]_vg2x4", "vm4d", "bh", MergeNone, "aarch64_sme_fmlal_single_vg2x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG2x1_S16 : Inst<"svmla_za32[_{d}]_vg2x1",          "vmdd", "s", MergeNone, "aarch64_sme_smlal_single_vg2x1", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG2x2_S16 : Inst<"svmla[_single]_za32[_{d}]_vg2x2", "vm2d", "s", MergeNone, "aarch64_sme_smlal_single_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG2x4_S16 : Inst<"svmla[_single]_za32[_{d}]_vg2x4", "vm4d", "s", MergeNone, "aarch64_sme_smlal_single_vg2x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG2x1_U16 : Inst<"svmla_za32[_{d}]_vg2x1",          "vmdd", "Us", MergeNone, "aarch64_sme_umlal_single_vg2x1", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG2x2_U16 : Inst<"svmla[_single]_za32[_{d}]_vg2x2", "vm2d", "Us", MergeNone, "aarch64_sme_umlal_single_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG2x4_U16 : Inst<"svmla[_single]_za32[_{d}]_vg2x4", "vm4d", "Us", MergeNone, "aarch64_sme_umlal_single_vg2x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLAL_SINGLE_VG4x1_S8  : Inst<"svmla_za32[_{d}]_vg4x1",          "vmdd", "c", MergeNone, "aarch64_sme_smla_za32_single_vg4x1", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG4x1_U8  : Inst<"svmla_za32[_{d}]_vg4x1",          "vmdd", "Uc", MergeNone, "aarch64_sme_umla_za32_single_vg4x1", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG4x2_S8  : Inst<"svmla[_single]_za32[_{d}]_vg4x2", "vm2d", "c", MergeNone, "aarch64_sme_smla_za32_single_vg4x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG4x2_U8  : Inst<"svmla[_single]_za32[_{d}]_vg4x2", "vm2d", "Uc", MergeNone, "aarch64_sme_umla_za32_single_vg4x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG4x4_S8  : Inst<"svmla[_single]_za32[_{d}]_vg4x4", "vm4d", "c", MergeNone, "aarch64_sme_smla_za32_single_vg4x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_SINGLE_VG4x4_U8  : Inst<"svmla[_single]_za32[_{d}]_vg4x4", "vm4d", "Uc", MergeNone, "aarch64_sme_umla_za32_single_vg4x4", [IsStreaming, IsSharedZA], []>;
+
+  let TargetGuard = "sme-i16i64" in {
+    def SVMLAL_SINGLE_VG4x1_S16 : Inst<"svmla_za64[_{d}]_vg4x1",          "vmdd", "s", MergeNone, "aarch64_sme_smla_za64_single_vg4x1", [IsStreaming, IsSharedZA], []>;
+    def SVMLAL_SINGLE_VG4x1_U16 : Inst<"svmla_za64[_{d}]_vg4x1",          "vmdd", "Us", MergeNone, "aarch64_sme_umla_za64_single_vg4x1", [IsStreaming, IsSharedZA], []>;
+    def SVMLAL_SINGLE_VG4x2_S16 : Inst<"svmla[_single]_za64[_{d}]_vg4x2", "vm2d", "s", MergeNone, "aarch64_sme_smla_za64_single_vg4x2", [IsStreaming, IsSharedZA], []>;
+    def SVMLAL_SINGLE_VG4x2_U16 : Inst<"svmla[_single]_za64[_{d}]_vg4x2", "vm2d", "Us", MergeNone, "aarch64_sme_umla_za64_single_vg4x2", [IsStreaming, IsSharedZA], []>;
+    def SVMLAL_SINGLE_VG4x4_S16 : Inst<"svmla[_single]_za64[_{d}]_vg4x4", "vm4d", "s", MergeNone, "aarch64_sme_smla_za64_single_vg4x4", [IsStreaming, IsSharedZA], []>;
+    def SVMLAL_SINGLE_VG4x4_U16 : Inst<"svmla[_single]_za64[_{d}]_vg4x4", "vm4d", "Us", MergeNone, "aarch64_sme_umla_za64_single_vg4x4", [IsStreaming, IsSharedZA], []>;
+  }
+
+  // SINGLE MLSL
+  def SVMLSL_SINGLE_VG2x1_F16 : Inst<"svmls_za32[_{d}]_vg2x1",          "vmdd", "bh", MergeNone, "aarch64_sme_fmlsl_single_vg2x1", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG2x2_F16 : Inst<"svmls[_single]_za32[_{d}]_vg2x2", "vm2d", "bh", MergeNone, "aarch64_sme_fmlsl_single_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG2x4_F16 : Inst<"svmls[_single]_za32[_{d}]_vg2x4", "vm4d", "bh", MergeNone, "aarch64_sme_fmlsl_single_vg2x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG2x1_S16 : Inst<"svmls_za32[_{d}]_vg2x1",          "vmdd", "s", MergeNone, "aarch64_sme_smlsl_single_vg2x1", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG2x2_S16 : Inst<"svmls[_single]_za32[_{d}]_vg2x2", "vm2d", "s", MergeNone, "aarch64_sme_smlsl_single_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG2x4_S16 : Inst<"svmls[_single]_za32[_{d}]_vg2x4", "vm4d", "s", MergeNone, "aarch64_sme_smlsl_single_vg2x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG2x1_U16 : Inst<"svmls_za32[_{d}]_vg2x1",          "vmdd", "Us", MergeNone, "aarch64_sme_umlsl_single_vg2x1", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG2x2_U16 : Inst<"svmls[_single]_za32[_{d}]_vg2x2", "vm2d", "Us", MergeNone, "aarch64_sme_umlsl_single_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG2x4_U16 : Inst<"svmls[_single]_za32[_{d}]_vg2x4", "vm4d", "Us", MergeNone, "aarch64_sme_umlsl_single_vg2x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLSL_SINGLE_VG4x1_S8  : Inst<"svmls_za32[_{d}]_vg4x1",          "vmdd", "c", MergeNone, "aarch64_sme_smls_za32_single_vg4x1", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG4x1_U8  : Inst<"svmls_za32[_{d}]_vg4x1",          "vmdd", "Uc", MergeNone, "aarch64_sme_umls_za32_single_vg4x1", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG4x2_S8  : Inst<"svmls[_single]_za32[_{d}]_vg4x2", "vm2d", "c", MergeNone, "aarch64_sme_smls_za32_single_vg4x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG4x2_U8  : Inst<"svmls[_single]_za32[_{d}]_vg4x2", "vm2d", "Uc", MergeNone, "aarch64_sme_umls_za32_single_vg4x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG4x4_S8  : Inst<"svmls[_single]_za32[_{d}]_vg4x4", "vm4d", "c", MergeNone, "aarch64_sme_smls_za32_single_vg4x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLSL_SINGLE_VG4x4_U8  : Inst<"svmls[_single]_za32[_{d}]_vg4x4", "vm4d", "Uc", MergeNone, "aarch64_sme_umls_za32_single_vg4x4", [IsStreaming, IsSharedZA], []>;
+
+  let TargetGuard = "sme-i16i64" in {
+    def SVMLSL_SINGLE_VG4x1_S16 : Inst<"svmls_za64[_{d}]_vg4x1",          "vmdd", "s", MergeNone, "aarch64_sme_smls_za64_single_vg4x1", [IsStreaming, IsSharedZA], []>;
+    def SVMLSL_SINGLE_VG4x1_U16 : Inst<"svmls_za64[_{d}]_vg4x1",          "vmdd", "Us", MergeNone, "aarch64_sme_umls_za64_single_vg4x1", [IsStreaming, IsSharedZA], []>;
+    def SVMLSL_SINGLE_VG4x2_S16 : Inst<"svmls[_single]_za64[_{d}]_vg4x2", "vm2d", "s", MergeNone, "aarch64_sme_smls_za64_single_vg4x2", [IsStreaming, IsSharedZA], []>;
+    def SVMLSL_SINGLE_VG4x2_U16 : Inst<"svmls[_single]_za64[_{d}]_vg4x2", "vm2d", "Us", MergeNone, "aarch64_sme_umls_za64_single_vg4x2", [IsStreaming, IsSharedZA], []>;
+    def SVMLSL_SINGLE_VG4x4_S16 : Inst<"svmls[_single]_za64[_{d}]_vg4x4", "vm4d", "s", MergeNone, "aarch64_sme_smls_za64_single_vg4x4", [IsStreaming, IsSharedZA], []>;
+    def SVMLSL_SINGLE_VG4x4_U16 : Inst<"svmls[_single]_za64[_{d}]_vg4x4", "vm4d", "Us", MergeNone, "aarch64_sme_umls_za64_single_vg4x4", [IsStreaming, IsSharedZA], []>;
+  }
+
+  // INDEXED MLAL
+  def SVMLAL_LANE_VG2x1_F16 : Inst<"svmla_lane_za32[_{d}]_vg2x1", "vmddi", "bh", MergeNone, "aarch64_sme_fmlal_lane_vg2x1", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVMLAL_LANE_VG2x2_F16 : Inst<"svmla_lane_za32[_{d}]_vg2x2", "vm2di", "bh", MergeNone, "aarch64_sme_fmlal_lane_vg2x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVMLAL_LANE_VG2x4_F16 : Inst<"svmla_lane_za32[_{d}]_vg2x4", "vm4di", "bh", MergeNone, "aarch64_sme_fmlal_lane_vg2x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVMLAL_LANE_VG2x1_S16 : Inst<"svmla_lane_za32[_{d}]_vg2x1", "vmddi", "s", MergeNone, "aarch64_sme_smlal_lane_vg2x1", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVMLAL_LANE_VG2x2_S16 : Inst<"svmla_lane_za32[_{d}]_vg2x2", "vm2di", "s", MergeNone, "aarch64_sme_smlal_lane_vg2x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVMLAL_LANE_VG2x4_S16 : Inst<"svmla_lane_za32[_{d}]_vg2x4", "vm4di", "s", MergeNone, "aarch64_sme_smlal_lane_vg2x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVMLAL_LANE_VG2x1_U16 : Inst<"svmla_lane_za32[_{d}]_vg2x1", "vmddi", "Us", MergeNone, "aarch64_sme_umlal_lane_vg2x1", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVMLAL_LANE_VG2x2_U16 : Inst<"svmla_lane_za32[_{d}]_vg2x2", "vm2di", "Us", MergeNone, "aarch64_sme_umlal_lane_vg2x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVMLAL_LANE_VG2x4_U16 : Inst<"svmla_lane_za32[_{d}]_vg2x4", "vm4di", "Us", MergeNone, "aarch64_sme_umlal_lane_vg2x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+
+  def SVMLAL_LANE_VG4x1_S8 : Inst<"svmla_lane_za32[_{d}]_vg4x1", "vmddi", "c", MergeNone, "aarch64_sme_smla_za32_lane_vg4x1", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_15>]>;
+  def SVMLAL_LANE_VG4x1_U8 : Inst<"svmla_lane_za32[_{d}]_vg4x1", "vmddi", "Uc", MergeNone, "aarch64_sme_umla_za32_lane_vg4x1", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_15>]>;
+  def SVMLAL_LANE_VG4x2_S8 : Inst<"svmla_lane_za32[_{d}]_vg4x2", "vm2di", "c", MergeNone, "aarch64_sme_smla_za32_lane_vg4x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_15>]>;
+  def SVMLAL_LANE_VG4x2_U8 : Inst<"svmla_lane_za32[_{d}]_vg4x2", "vm2di", "Uc", MergeNone, "aarch64_sme_umla_za32_lane_vg4x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_15>]>;
+  def SVMLAL_LANE_VG4x4_S8 : Inst<"svmla_lane_za32[_{d}]_vg4x4", "vm4di", "c", MergeNone, "aarch64_sme_smla_za32_lane_vg4x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_15>]>;
+  def SVMLAL_LANE_VG4x4_U8 : Inst<"svmla_lane_za32[_{d}]_vg4x4", "vm4di", "Uc", MergeNone, "aarch64_sme_umla_za32_lane_vg4x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_15>]>;
+
+  let TargetGuard = "sme-i16i64" in {
+    def SVMLAL_LANE_VG4x1_S16 : Inst<"svmla_lane_za64[_{d}]_vg4x1", "vmddi", "s", MergeNone, "aarch64_sme_smla_za64_lane_vg4x1", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+    def SVMLAL_LANE_VG4x1_U16 : Inst<"svmla_lane_za64[_{d}]_vg4x1", "vmddi", "Us", MergeNone, "aarch64_sme_umla_za64_lane_vg4x1", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+    def SVMLAL_LANE_VG4x2_S16 : Inst<"svmla_lane_za64[_{d}]_vg4x2", "vm2di", "s", MergeNone, "aarch64_sme_smla_za64_lane_vg4x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+    def SVMLAL_LANE_VG4x2_U16 : Inst<"svmla_lane_za64[_{d}]_vg4x2", "vm2di", "Us", MergeNone, "aarch64_sme_umla_za64_lane_vg4x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+    def SVMLAL_LANE_VG4x4_S16 : Inst<"svmla_lane_za64[_{d}]_vg4x4", "vm4di", "s", MergeNone, "aarch64_sme_smla_za64_lane_vg4x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+    def SVMLAL_LANE_VG4x4_U16 : Inst<"svmla_lane_za64[_{d}]_vg4x4", "vm4di", "Us", MergeNone, "aarch64_sme_umla_za64_lane_vg4x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+  }
+
+  // INDEXED MLSL
+  def SVMLSL_LANE_VG2x1_F16 : Inst<"svmls_lane_za32[_{d}]_vg2x1", "vmddi", "bh", MergeNone, "aarch64_sme_fmlsl_lane_vg2x1", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVMLSL_LANE_VG2x2_F16 : Inst<"svmls_lane_za32[_{d}]_vg2x2", "vm2di", "bh", MergeNone, "aarch64_sme_fmlsl_lane_vg2x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVMLSL_LANE_VG2x4_F16 : Inst<"svmls_lane_za32[_{d}]_vg2x4", "vm4di", "bh", MergeNone, "aarch64_sme_fmlsl_lane_vg2x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVMLSL_LANE_VG2x1_S16 : Inst<"svmls_lane_za32[_{d}]_vg2x1", ...
[truncated]

Copy link

github-actions bot commented Dec 15, 2023

✅ With the latest revision this PR passed the C/C++ code formatter.

Copy link
Collaborator

@SamTebbs33 SamTebbs33 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, thanks!

Comment on lines +320 to +361
def SVMLA_MULTI_VG1x2_F32 : Inst<"svmla_za32[_{d}]_vg1x2", "vm22", "f", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsSharedZA], []>;
def SVMLA_MULTI_VG1x4_F32 : Inst<"svmla_za32[_{d}]_vg1x4", "vm44", "f", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsSharedZA], []>;
def SVMLS_MULTI_VG1x2_F32 : Inst<"svmls_za32[_{d}]_vg1x2", "vm22", "f", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsSharedZA], []>;
def SVMLS_MULTI_VG1x4_F32 : Inst<"svmls_za32[_{d}]_vg1x4", "vm44", "f", MergeNone, "aarch64_sme_fmls_vg1x4", [IsStreaming, IsSharedZA], []>;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These builtins are not covered by any of the tests in this PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, I see for SVMLA_MULTI_VG1x2_F32 in

void test_svmla2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32x2_t zm) __arm_streaming __arm_shared_za {
  SVE_ACLE_FUNC(svmla,,_za32,_f32,_vg1x2)(slice_base, zn, zm);
} 

in acle_sme2_mla.c
and the rest of acle_sme2_mla.c, acle_sme2_mls.c

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps I searched for svmla_za32 rather than svmla.*_za32. It kind of shows that the macro is not used correctly, as it should have been SVE_ACLE_FUNC(svmla_za32,_f32,_vg1x2,)

Comment on lines +330 to +371
def SVMLA_LANE_VG1x2_F32 : Inst<"svmla_lane_za32[_{d}]_vg1x2", "vm2di", "f", MergeNone, "aarch64_sme_fmla_lane_vg1x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>;
def SVMLA_LANE_VG1x4_F32 : Inst<"svmla_lane_za32[_{d}]_vg1x4", "vm4di", "f", MergeNone, "aarch64_sme_fmla_lane_vg1x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>;
def SVMLS_LANE_VG1x2_F32 : Inst<"svmls_lane_za32[_{d}]_vg1x2", "vm2di", "f", MergeNone, "aarch64_sme_fmls_lane_vg1x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>;
def SVMLS_LANE_VG1x4_F32 : Inst<"svmls_lane_za32[_{d}]_vg1x4", "vm4di", "f", MergeNone, "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These builtins are not covered by any of the tests in this PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

similarly in acle_sme2_mla.c, acle_sme2_mls.c with test_svmla_lane2_f32(), test_svmla_lane4_f32(), test_svmls_lane2_f32(), test_svmls_lane4_f32() functions.

Comment on lines +337 to +379
def SVMLA_MULTI_VG1x2_F64 : Inst<"svmla_za64[_{d}]_vg1x2", "vm22", "d", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsSharedZA], []>;
def SVMLA_MULTI_VG1x4_F64 : Inst<"svmla_za64[_{d}]_vg1x4", "vm44", "d", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsSharedZA], []>;
def SVMLS_MULTI_VG1x2_F64 : Inst<"svmls_za64[_{d}]_vg1x2", "vm22", "d", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsSharedZA], []>;
def SVMLS_MULTI_VG1x4_F64 : Inst<"svmls_za64[_{d}]_vg1x4", "vm44", "d", MergeNone, "aarch64_sme_fmls_vg1x4", [IsStreaming, IsSharedZA], []>;

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These builtins are not covered by any of the tests in this PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in acle_sme2_mla.c following functions test_svmla2_f64(), test_svmla4_f64() and in acle_sme2_mls.c test_svmls2_f64(), test_svmls4_f64() functions.

Comment on lines +347 to +388
def SVMLA_LANE_VG1x2_F64 : Inst<"svmla_lane_za64[_{d}]_vg1x2", "vm2di", "d", MergeNone, "aarch64_sme_fmla_lane_vg1x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_1>]>;
def SVMLA_LANE_VG1x4_F64 : Inst<"svmla_lane_za64[_{d}]_vg1x4", "vm4di", "d", MergeNone, "aarch64_sme_fmla_lane_vg1x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_1>]>;
def SVMLS_LANE_VG1x2_F64 : Inst<"svmls_lane_za64[_{d}]_vg1x2", "vm2di", "d", MergeNone, "aarch64_sme_fmls_lane_vg1x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_1>]>;
def SVMLS_LANE_VG1x4_F64 : Inst<"svmls_lane_za64[_{d}]_vg1x4", "vm4di", "d", MergeNone, "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_1>]>;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These builtins are not covered by any of the tests in this PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, acle_sme2_mla.c, acle_sme2_mls.c in test_svmla_lane2_f64(), test_svmla_lane4_f64(), , test_svmls_lane2_f64, test_svmla_lane4_f64(),

Comment on lines +419 to +465
def SVMLAL_LANE_VG2x1_F16 : Inst<"svmla_lane_za32[_{d}]_vg2x1", "vmddi", "bh", MergeNone, "aarch64_sme_fmlal_lane_vg2x1", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
def SVMLAL_LANE_VG2x2_F16 : Inst<"svmla_lane_za32[_{d}]_vg2x2", "vm2di", "bh", MergeNone, "aarch64_sme_fmlal_lane_vg2x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
def SVMLAL_LANE_VG2x4_F16 : Inst<"svmla_lane_za32[_{d}]_vg2x4", "vm4di", "bh", MergeNone, "aarch64_sme_fmlal_lane_vg2x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
def SVMLAL_LANE_VG2x1_S16 : Inst<"svmla_lane_za32[_{d}]_vg2x1", "vmddi", "s", MergeNone, "aarch64_sme_smlal_lane_vg2x1", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
def SVMLAL_LANE_VG2x2_S16 : Inst<"svmla_lane_za32[_{d}]_vg2x2", "vm2di", "s", MergeNone, "aarch64_sme_smlal_lane_vg2x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
def SVMLAL_LANE_VG2x4_S16 : Inst<"svmla_lane_za32[_{d}]_vg2x4", "vm4di", "s", MergeNone, "aarch64_sme_smlal_lane_vg2x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
def SVMLAL_LANE_VG2x1_U16 : Inst<"svmla_lane_za32[_{d}]_vg2x1", "vmddi", "Us", MergeNone, "aarch64_sme_umlal_lane_vg2x1", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
def SVMLAL_LANE_VG2x2_U16 : Inst<"svmla_lane_za32[_{d}]_vg2x2", "vm2di", "Us", MergeNone, "aarch64_sme_umlal_lane_vg2x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
def SVMLAL_LANE_VG2x4_U16 : Inst<"svmla_lane_za32[_{d}]_vg2x4", "vm4di", "Us", MergeNone, "aarch64_sme_umlal_lane_vg2x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These builtins are not covered by any of the tests in this PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in acle_sme2_mlal.c with those functions test_svmla_lane1_f16(), test_svmla_lane2_f16(), test_svmla4_f16(), test_svmla_lane1_s16(), test_svmla_lane2_s16, test_svmla_lane4_s16, test_svmla_lane1_u16(), test_svmla_lane2_u16, test_svmla_lane4_u16

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good to know there are tests for them! As I replied above, these tests are not using the macro correctly, hence why the grep command didn't show them. Could you please update the tests so that they do?

I think that if the patch was more appropriately sized this would have been more apparent and I would not have needed to grep my way through the tests.

Comment on lines +437 to +483
def SVMLSL_LANE_VG2x1_F16 : Inst<"svmls_lane_za32[_{d}]_vg2x1", "vmddi", "bh", MergeNone, "aarch64_sme_fmlsl_lane_vg2x1", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
def SVMLSL_LANE_VG2x2_F16 : Inst<"svmls_lane_za32[_{d}]_vg2x2", "vm2di", "bh", MergeNone, "aarch64_sme_fmlsl_lane_vg2x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
def SVMLSL_LANE_VG2x4_F16 : Inst<"svmls_lane_za32[_{d}]_vg2x4", "vm4di", "bh", MergeNone, "aarch64_sme_fmlsl_lane_vg2x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
def SVMLSL_LANE_VG2x1_S16 : Inst<"svmls_lane_za32[_{d}]_vg2x1", "vmddi", "s", MergeNone, "aarch64_sme_smlsl_lane_vg2x1", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
def SVMLSL_LANE_VG2x2_S16 : Inst<"svmls_lane_za32[_{d}]_vg2x2", "vm2di", "s", MergeNone, "aarch64_sme_smlsl_lane_vg2x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
def SVMLSL_LANE_VG2x4_S16 : Inst<"svmls_lane_za32[_{d}]_vg2x4", "vm4di", "s", MergeNone, "aarch64_sme_smlsl_lane_vg2x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
def SVMLSL_LANE_VG2x1_U16 : Inst<"svmls_lane_za32[_{d}]_vg2x1", "vmddi", "Us", MergeNone, "aarch64_sme_umlsl_lane_vg2x1", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
def SVMLSL_LANE_VG2x2_U16 : Inst<"svmls_lane_za32[_{d}]_vg2x2", "vm2di", "Us", MergeNone, "aarch64_sme_umlsl_lane_vg2x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
def SVMLSL_LANE_VG2x4_U16 : Inst<"svmls_lane_za32[_{d}]_vg2x4", "vm4di", "Us", MergeNone, "aarch64_sme_umlsl_lane_vg2x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_7>]>;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These builtins are not covered by any of the tests in this PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in acle_sme2_mlsl.c following functions: test_svmls_lane1_f16(), test_svmls_lane2_f16(), test_svmls4_f16(), test_svmls_lane1_s16(), test_svmls_lane2_s16, test_svmls_lane4_s16, test_svmls_lane1_u16(), test_svmls_lane2_u16, test_svmls_lane4_u16

@dtemirbulatov dtemirbulatov merged commit 77c5c44 into llvm:main Dec 21, 2023
3 of 4 checks passed
@dtemirbulatov dtemirbulatov deleted the sme2-mla-mls branch December 21, 2023 16:42
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
clang:codegen clang:frontend Language frontend issues, e.g. anything involving "Sema" clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

5 participants