[mlir][ArmSME] Add tests for Streaming SVE

This patch adds a couple of tests for targeting Arm Streaming SVE (SSVE) mode, part of the Arm Scalable Matrix Extension (SME). SSVE is enabled in the backend at the function boundary by specifying the `aarch64_pstate_sm_enabled` attribute, as documented here [1]. SSVE can be targeted from MLIR by specifying this in the passthrough attributes [2] and compiling with -mattr=+sme,+sve -force-streaming-compatible-sve The passthrough will propagate to the backend where `smstart/smstop` will be emitted around the call to the SSVE function. The set of legal instructions changes in SSVE, `-force-streaming-compatible-sve` avoids the use of NEON entirely and instead lowers to (streaming-compatible) SVE. The behaviour this flag predicates will be hooked up to the function attribute in the future such that simply specifying this (should) lead to correct code-generation. Two tests are added: * A basic LLVMIR test verifying the attribute is passed through. * An integration test calling a SSVE function. The integration test can be run with QEMU. [1] https://llvm.org/docs/AArch64SME.html [2] https://mlir.llvm.org/docs/Dialects/LLVM/#attribute-pass-through Reviewed By: awarzynski, aartbik Differential Revision: https://reviews.llvm.org/D148111
llvm · Apr 25, 2023 · c8d1388 · c8d1388
1 parent c2f29f2
commit c8d1388
Show file tree

Hide file tree

Showing 7 changed files with 135 additions and 0 deletions.
diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt
@@ -29,6 +29,7 @@ if (MLIR_INCLUDE_INTEGRATION_TESTS)
   option(MLIR_RUN_CUDA_TENSOR_CORE_TESTS "Run CUDA Tensor core WMMA tests.")
   option(MLIR_RUN_CUDA_SM80_TESTS "Run CUDA A100 tests.")
   option(MLIR_RUN_ARM_SVE_TESTS "Run Arm SVE tests.")
+  option(MLIR_RUN_ARM_SME_TESTS "Run Arm SME tests.")
 
 
   # The native target may not be enabled when cross compiling, raise an error.
@@ -52,6 +53,7 @@ llvm_canonicalize_cmake_booleans(
   MLIR_RUN_CUDA_TENSOR_CORE_TESTS
   MLIR_RUN_X86VECTOR_TESTS
   MLIR_RUN_ARM_SVE_TESTS
+  MLIR_RUN_ARM_SME_TESTS
   MLIR_RUN_CUDA_SM80_TESTS
   )
 

diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/lit.local.cfg b/mlir/test/Integration/Dialect/SparseTensor/CPU/lit.local.cfg
@@ -1,4 +1,5 @@
 import sys
+from lit.llvm import llvm_config
 
 # FIXME: %mlir_native_utils_lib_dir is set incorrectly on Windows
 if sys.platform == 'win32':
@@ -18,6 +19,15 @@ if config.mlir_run_arm_sve_tests:
         config.substitutions.append(('%mlir_native_utils_lib_dir', config.mlir_lib_dir))
 
     if config.arm_emulator_executable:
+        if not config.arm_emulator_lli_executable:
+            # Top-level lit config adds llvm_tools_dir to PATH but this is lost
+            # when running under an emulator. If the user didn't specify an lli
+            # executable, use absolute path %llvm_tools_dir/lli.
+            # TODO(c-rhodes): This logic is duplicated across several Lit files
+            # and needs refactoring.
+            lli_cmd = llvm_config.use_llvm_tool('lli', search_env='LLI', required=True,
+                                                search_paths=[config.llvm_tools_dir],
+                                                use_installed=False)
         # Run test in emulator (qemu or armie).
         emulation_cmd = config.arm_emulator_executable
         if config.arm_emulator_options:

diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/lit.local.cfg b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/lit.local.cfg
@@ -0,0 +1,36 @@
+import sys
+from lit.llvm import llvm_config
+
+# ArmSME tests must be enabled via build flag.
+if not config.mlir_run_arm_sme_tests:
+    config.unsupported = True
+
+# No JIT on win32.
+if sys.platform == 'win32':
+    config.unsupported = True
+
+lli_cmd = 'lli'
+if config.arm_emulator_lli_executable:
+    lli_cmd = config.arm_emulator_lli_executable
+
+config.substitutions.append(('%mlir_native_utils_lib_dir',
+    config.arm_emulator_utils_lib_dir or config.mlir_lib_dir))
+
+if config.arm_emulator_executable:
+    if not config.arm_emulator_lli_executable:
+        # Top-level lit config adds llvm_tools_dir to PATH but this is lost
+        # when running under an emulator. If the user didn't specify an lli
+        # executable, use absolute path %llvm_tools_dir/lli.
+        # TODO(c-rhodes): This logic is duplicated across several Lit files and
+        # needs refactoring.
+        lli_cmd = llvm_config.use_llvm_tool('lli', search_env='LLI', required=True,
+                                            search_paths=[config.llvm_tools_dir],
+                                            use_installed=False)
+    # Run test in emulator (QEMU)
+    emulation_cmd = config.arm_emulator_executable
+    if config.arm_emulator_options:
+        emulation_cmd = emulation_cmd + ' ' + config.arm_emulator_options
+    emulation_cmd = emulation_cmd + ' ' + lli_cmd
+    config.substitutions.append(('%lli', emulation_cmd))
+else:
+    config.substitutions.append(('%lli', lli_cmd))
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-ssve.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-ssve.mlir
@@ -0,0 +1,65 @@
+// RUN: mlir-opt %s -test-lower-to-llvm | \
+// RUN: mlir-translate -mlir-to-llvmir | \
+// RUN: %lli --march=aarch64 --mattr="+sve,+sme" \
+// RUN:      -force-streaming-compatible-sve \
+// RUN:      --entry-function=entry \
+// RUN:      --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+
+// NOTE: To run this test, your CPU must support SME.
+
+// VLA memcopy in streaming mode.
+func.func @streaming_kernel_copy(%src : memref<?xi64>, %dst : memref<?xi64>, %size : index) attributes {passthrough = ["aarch64_pstate_sm_enabled"]} {
+  %c0 = arith.constant 0 : index
+  %c2 = arith.constant 2 : index
+  %vscale = vector.vscale
+  %step = arith.muli %c2, %vscale : index
+  scf.for %i = %c0 to %size step %step {
+    %0 = vector.load %src[%i] : memref<?xi64>, vector<[2]xi64>
+    vector.store %0, %dst[%i] : memref<?xi64>, vector<[2]xi64>
+  }
+  return
+}
+
+func.func @entry() -> i32 {
+  %i0 = arith.constant 0: i64
+  %r0 = arith.constant 0: i32
+  %c0 = arith.constant 0: index
+  %c4 = arith.constant 4: index
+  %c32 = arith.constant 32: index
+
+  // Set up memory.
+  %a = memref.alloc()      : memref<32xi64>
+  %a_copy = memref.alloc() : memref<32xi64>
+  %a_data = arith.constant dense<[1 , 2,  3 , 4 , 5,  6,  7,  8,
+                                  9, 10, 11, 12, 13, 14, 15, 16,
+                                  17, 18, 19, 20, 21, 22, 23, 24,
+                                  25, 26, 27, 28, 29, 30, 31, 32]> : vector<32xi64>
+  vector.transfer_write %a_data, %a[%c0] : vector<32xi64>, memref<32xi64>
+
+  // Call kernel.
+  %0 = memref.cast %a : memref<32xi64> to memref<?xi64>
+  %1 = memref.cast %a_copy : memref<32xi64> to memref<?xi64>
+  call @streaming_kernel_copy(%0, %1, %c32) : (memref<?xi64>, memref<?xi64>, index) -> ()
+
+  // Print and verify.
+  //
+  // CHECK:      ( 1, 2, 3, 4 )
+  // CHECK-NEXT: ( 5, 6, 7, 8 )
+  // CHECK-NEXT: ( 9, 10, 11, 12 )
+  // CHECK-NEXT: ( 13, 14, 15, 16 )
+  // CHECK-NEXT: ( 17, 18, 19, 20 )
+  // CHECK-NEXT: ( 21, 22, 23, 24 )
+  // CHECK-NEXT: ( 25, 26, 27, 28 )
+  // CHECK-NEXT: ( 29, 30, 31, 32 )
+  scf.for %i = %c0 to %c32 step %c4 {
+    %cv = vector.transfer_read %a_copy[%i], %i0 : memref<32xi64>, vector<4xi64>
+    vector.print %cv : vector<4xi64>
+  }
+
+  // Release resources.
+  memref.dealloc %a      : memref<32xi64>
+  memref.dealloc %a_copy : memref<32xi64>
+
+  return %r0 : i32
+}
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/lit.local.cfg b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/lit.local.cfg
@@ -1,4 +1,5 @@
 import sys
+from lit.llvm import llvm_config
 
 # ArmSVE tests must be enabled via build flag.
 if not config.mlir_run_arm_sve_tests:
@@ -16,6 +17,15 @@ config.substitutions.append(('%mlir_native_utils_lib_dir',
     config.arm_emulator_utils_lib_dir or config.mlir_lib_dir))
 
 if config.arm_emulator_executable:
+    if not config.arm_emulator_lli_executable:
+        # Top-level lit config adds llvm_tools_dir to PATH but this is lost
+        # when running under an emulator. If the user didn't specify an lli
+        # executable, use absolute path %llvm_tools_dir/lli.
+        # TODO(c-rhodes): This logic is duplicated across several Lit files and
+        # needs refactoring.
+        lli_cmd = llvm_config.use_llvm_tool('lli', search_env='LLI', required=True,
+                                            search_paths=[config.llvm_tools_dir],
+                                            use_installed=False)
     # Run test in emulator (qemu or armie)
     emulation_cmd = config.arm_emulator_executable
     if config.arm_emulator_options:

diff --git a/mlir/test/Target/LLVMIR/arm-ssve.mlir b/mlir/test/Target/LLVMIR/arm-ssve.mlir
@@ -0,0 +1,11 @@
+// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
+
+// Attribute to enable streaming-mode.
+
+// CHECK-LABEL: @streaming_callee
+// CHECK: #[[ATTR:[0-9]*]]
+llvm.func @streaming_callee() attributes {passthrough = ["aarch64_pstate_sm_enabled"]} {
+  llvm.return
+}
+
+// CHECK: attributes #[[ATTR]] = { "aarch64_pstate_sm_enabled" }
diff --git a/mlir/test/lit.site.cfg.py.in b/mlir/test/lit.site.cfg.py.in
@@ -36,6 +36,7 @@ config.enable_bindings_python = @MLIR_ENABLE_BINDINGS_PYTHON@
 config.intel_sde_executable = "@INTEL_SDE_EXECUTABLE@"
 config.mlir_run_amx_tests = @MLIR_RUN_AMX_TESTS@
 config.mlir_run_arm_sve_tests = @MLIR_RUN_ARM_SVE_TESTS@
+config.mlir_run_arm_sme_tests = @MLIR_RUN_ARM_SME_TESTS@
 config.mlir_run_x86vector_tests = @MLIR_RUN_X86VECTOR_TESTS@
 config.mlir_run_riscv_vector_tests = "@MLIR_RUN_RISCV_VECTOR_TESTS@"
 config.mlir_run_cuda_tensor_core_tests = @MLIR_RUN_CUDA_TENSOR_CORE_TESTS@