Skip to content

Commit

Permalink
[mlir][ArmSME] Add tests for Streaming SVE
Browse files Browse the repository at this point in the history
This patch adds a couple of tests for targeting Arm Streaming SVE (SSVE)
mode, part of the Arm Scalable Matrix Extension (SME).

SSVE is enabled in the backend at the function boundary by specifying
the `aarch64_pstate_sm_enabled` attribute, as documented here [1]. SSVE
can be targeted from MLIR by specifying this in the passthrough
attributes [2] and compiling with

  -mattr=+sme,+sve -force-streaming-compatible-sve

The passthrough will propagate to the backend where `smstart/smstop`
will be emitted around the call to the SSVE function.

The set of legal instructions changes in SSVE,
`-force-streaming-compatible-sve` avoids the use of NEON entirely and
instead lowers to (streaming-compatible) SVE. The behaviour this flag
predicates will be hooked up to the function attribute in the future
such that simply specifying this (should) lead to correct
code-generation.

Two tests are added:

  * A basic LLVMIR test verifying the attribute is passed through.
  * An integration test calling a SSVE function.

The integration test can be run with QEMU.

[1] https://llvm.org/docs/AArch64SME.html
[2] https://mlir.llvm.org/docs/Dialects/LLVM/#attribute-pass-through

Reviewed By: awarzynski, aartbik

Differential Revision: https://reviews.llvm.org/D148111
  • Loading branch information
c-rhodes committed Apr 25, 2023
1 parent c2f29f2 commit c8d1388
Show file tree
Hide file tree
Showing 7 changed files with 135 additions and 0 deletions.
2 changes: 2 additions & 0 deletions mlir/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ if (MLIR_INCLUDE_INTEGRATION_TESTS)
option(MLIR_RUN_CUDA_TENSOR_CORE_TESTS "Run CUDA Tensor core WMMA tests.")
option(MLIR_RUN_CUDA_SM80_TESTS "Run CUDA A100 tests.")
option(MLIR_RUN_ARM_SVE_TESTS "Run Arm SVE tests.")
option(MLIR_RUN_ARM_SME_TESTS "Run Arm SME tests.")


# The native target may not be enabled when cross compiling, raise an error.
Expand All @@ -52,6 +53,7 @@ llvm_canonicalize_cmake_booleans(
MLIR_RUN_CUDA_TENSOR_CORE_TESTS
MLIR_RUN_X86VECTOR_TESTS
MLIR_RUN_ARM_SVE_TESTS
MLIR_RUN_ARM_SME_TESTS
MLIR_RUN_CUDA_SM80_TESTS
)

Expand Down
10 changes: 10 additions & 0 deletions mlir/test/Integration/Dialect/SparseTensor/CPU/lit.local.cfg
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import sys
from lit.llvm import llvm_config

# FIXME: %mlir_native_utils_lib_dir is set incorrectly on Windows
if sys.platform == 'win32':
Expand All @@ -18,6 +19,15 @@ if config.mlir_run_arm_sve_tests:
config.substitutions.append(('%mlir_native_utils_lib_dir', config.mlir_lib_dir))

if config.arm_emulator_executable:
if not config.arm_emulator_lli_executable:
# Top-level lit config adds llvm_tools_dir to PATH but this is lost
# when running under an emulator. If the user didn't specify an lli
# executable, use absolute path %llvm_tools_dir/lli.
# TODO(c-rhodes): This logic is duplicated across several Lit files
# and needs refactoring.
lli_cmd = llvm_config.use_llvm_tool('lli', search_env='LLI', required=True,
search_paths=[config.llvm_tools_dir],
use_installed=False)
# Run test in emulator (qemu or armie).
emulation_cmd = config.arm_emulator_executable
if config.arm_emulator_options:
Expand Down
36 changes: 36 additions & 0 deletions mlir/test/Integration/Dialect/Vector/CPU/ArmSME/lit.local.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import sys
from lit.llvm import llvm_config

# ArmSME tests must be enabled via build flag.
if not config.mlir_run_arm_sme_tests:
config.unsupported = True

# No JIT on win32.
if sys.platform == 'win32':
config.unsupported = True

lli_cmd = 'lli'
if config.arm_emulator_lli_executable:
lli_cmd = config.arm_emulator_lli_executable

config.substitutions.append(('%mlir_native_utils_lib_dir',
config.arm_emulator_utils_lib_dir or config.mlir_lib_dir))

if config.arm_emulator_executable:
if not config.arm_emulator_lli_executable:
# Top-level lit config adds llvm_tools_dir to PATH but this is lost
# when running under an emulator. If the user didn't specify an lli
# executable, use absolute path %llvm_tools_dir/lli.
# TODO(c-rhodes): This logic is duplicated across several Lit files and
# needs refactoring.
lli_cmd = llvm_config.use_llvm_tool('lli', search_env='LLI', required=True,
search_paths=[config.llvm_tools_dir],
use_installed=False)
# Run test in emulator (QEMU)
emulation_cmd = config.arm_emulator_executable
if config.arm_emulator_options:
emulation_cmd = emulation_cmd + ' ' + config.arm_emulator_options
emulation_cmd = emulation_cmd + ' ' + lli_cmd
config.substitutions.append(('%lli', emulation_cmd))
else:
config.substitutions.append(('%lli', lli_cmd))
65 changes: 65 additions & 0 deletions mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-ssve.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// RUN: mlir-opt %s -test-lower-to-llvm | \
// RUN: mlir-translate -mlir-to-llvmir | \
// RUN: %lli --march=aarch64 --mattr="+sve,+sme" \
// RUN: -force-streaming-compatible-sve \
// RUN: --entry-function=entry \
// RUN: --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s

// NOTE: To run this test, your CPU must support SME.

// VLA memcopy in streaming mode.
func.func @streaming_kernel_copy(%src : memref<?xi64>, %dst : memref<?xi64>, %size : index) attributes {passthrough = ["aarch64_pstate_sm_enabled"]} {
%c0 = arith.constant 0 : index
%c2 = arith.constant 2 : index
%vscale = vector.vscale
%step = arith.muli %c2, %vscale : index
scf.for %i = %c0 to %size step %step {
%0 = vector.load %src[%i] : memref<?xi64>, vector<[2]xi64>
vector.store %0, %dst[%i] : memref<?xi64>, vector<[2]xi64>
}
return
}

func.func @entry() -> i32 {
%i0 = arith.constant 0: i64
%r0 = arith.constant 0: i32
%c0 = arith.constant 0: index
%c4 = arith.constant 4: index
%c32 = arith.constant 32: index

// Set up memory.
%a = memref.alloc() : memref<32xi64>
%a_copy = memref.alloc() : memref<32xi64>
%a_data = arith.constant dense<[1 , 2, 3 , 4 , 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32]> : vector<32xi64>
vector.transfer_write %a_data, %a[%c0] : vector<32xi64>, memref<32xi64>

// Call kernel.
%0 = memref.cast %a : memref<32xi64> to memref<?xi64>
%1 = memref.cast %a_copy : memref<32xi64> to memref<?xi64>
call @streaming_kernel_copy(%0, %1, %c32) : (memref<?xi64>, memref<?xi64>, index) -> ()

// Print and verify.
//
// CHECK: ( 1, 2, 3, 4 )
// CHECK-NEXT: ( 5, 6, 7, 8 )
// CHECK-NEXT: ( 9, 10, 11, 12 )
// CHECK-NEXT: ( 13, 14, 15, 16 )
// CHECK-NEXT: ( 17, 18, 19, 20 )
// CHECK-NEXT: ( 21, 22, 23, 24 )
// CHECK-NEXT: ( 25, 26, 27, 28 )
// CHECK-NEXT: ( 29, 30, 31, 32 )
scf.for %i = %c0 to %c32 step %c4 {
%cv = vector.transfer_read %a_copy[%i], %i0 : memref<32xi64>, vector<4xi64>
vector.print %cv : vector<4xi64>
}

// Release resources.
memref.dealloc %a : memref<32xi64>
memref.dealloc %a_copy : memref<32xi64>

return %r0 : i32
}
10 changes: 10 additions & 0 deletions mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/lit.local.cfg
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import sys
from lit.llvm import llvm_config

# ArmSVE tests must be enabled via build flag.
if not config.mlir_run_arm_sve_tests:
Expand All @@ -16,6 +17,15 @@ config.substitutions.append(('%mlir_native_utils_lib_dir',
config.arm_emulator_utils_lib_dir or config.mlir_lib_dir))

if config.arm_emulator_executable:
if not config.arm_emulator_lli_executable:
# Top-level lit config adds llvm_tools_dir to PATH but this is lost
# when running under an emulator. If the user didn't specify an lli
# executable, use absolute path %llvm_tools_dir/lli.
# TODO(c-rhodes): This logic is duplicated across several Lit files and
# needs refactoring.
lli_cmd = llvm_config.use_llvm_tool('lli', search_env='LLI', required=True,
search_paths=[config.llvm_tools_dir],
use_installed=False)
# Run test in emulator (qemu or armie)
emulation_cmd = config.arm_emulator_executable
if config.arm_emulator_options:
Expand Down
11 changes: 11 additions & 0 deletions mlir/test/Target/LLVMIR/arm-ssve.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s

// Attribute to enable streaming-mode.

// CHECK-LABEL: @streaming_callee
// CHECK: #[[ATTR:[0-9]*]]
llvm.func @streaming_callee() attributes {passthrough = ["aarch64_pstate_sm_enabled"]} {
llvm.return
}

// CHECK: attributes #[[ATTR]] = { "aarch64_pstate_sm_enabled" }
1 change: 1 addition & 0 deletions mlir/test/lit.site.cfg.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ config.enable_bindings_python = @MLIR_ENABLE_BINDINGS_PYTHON@
config.intel_sde_executable = "@INTEL_SDE_EXECUTABLE@"
config.mlir_run_amx_tests = @MLIR_RUN_AMX_TESTS@
config.mlir_run_arm_sve_tests = @MLIR_RUN_ARM_SVE_TESTS@
config.mlir_run_arm_sme_tests = @MLIR_RUN_ARM_SME_TESTS@
config.mlir_run_x86vector_tests = @MLIR_RUN_X86VECTOR_TESTS@
config.mlir_run_riscv_vector_tests = "@MLIR_RUN_RISCV_VECTOR_TESTS@"
config.mlir_run_cuda_tensor_core_tests = @MLIR_RUN_CUDA_TENSOR_CORE_TESTS@
Expand Down

0 comments on commit c8d1388

Please sign in to comment.