Skip to content

Conversation

@MacDue
Copy link
Member

@MacDue MacDue commented Oct 28, 2025

This patch reimplements the SME ABI __arm_za_disable routine within libunwind. This routine must be called before resuming from unwinding on AArch64 platforms with SME support.

Before calling the routine, we need to check that SME is available. In this patch, this is implemented for Linux-based platforms by checking HWCAP2. It should be possible to implement this check for other platforms as required.

This patch includes a test for this functionality. This test requires SME, so on platforms without it, it will simply pass.

This patch reimplements the SME ABI `__arm_za_disable` routine within
libunwind. This routine must be called before resuming from unwinding on
AArch64 platforms with SME support.

Before calling the routine, we need to check that SME is available. In
this patch, this is implemented for Linux-based platforms by checking
HWCAP2. It should be possible to implement this check for other
platforms as required.

This patch includes a test for this functionality. This test requires
SME, so on platforms without it, it will simply pass.
@MacDue MacDue changed the title [libunwind] Disable ZA before resuming from unwinding (Linux) [libunwind] Disable ZA before resuming from unwinding (on Linux) Oct 29, 2025
@MacDue MacDue changed the title [libunwind] Disable ZA before resuming from unwinding (on Linux) [libunwind][AArch64] Disable ZA before resuming from unwinding (on Linux) Oct 29, 2025
@MacDue MacDue requested a review from sdesmalen-arm October 29, 2025 15:51
@MacDue MacDue marked this pull request as ready for review October 30, 2025 19:32
@MacDue MacDue requested a review from a team as a code owner October 30, 2025 19:32
@llvmbot
Copy link
Member

llvmbot commented Oct 30, 2025

@llvm/pr-subscribers-libunwind

Author: Benjamin Maxwell (MacDue)

Changes

This patch reimplements the SME ABI __arm_za_disable routine within libunwind. This routine must be called before resuming from unwinding on AArch64 platforms with SME support.

Before calling the routine, we need to check that SME is available. In this patch, this is implemented for Linux-based platforms by checking HWCAP2. It should be possible to implement this check for other platforms as required.

This patch includes a test for this functionality. This test requires SME, so on platforms without it, it will simply pass.


Full diff: https://github.com/llvm/llvm-project/pull/165451.diff

4 Files Affected:

  • (modified) libunwind/src/Registers.hpp (+38-15)
  • (modified) libunwind/src/UnwindRegistersSave.S (+48)
  • (modified) libunwind/src/libunwind.cpp (-1)
  • (added) libunwind/test/aarch64_za_unwind.pass.cpp (+117)
diff --git a/libunwind/src/Registers.hpp b/libunwind/src/Registers.hpp
index 5a5b57835379a..e4530d58f9a04 100644
--- a/libunwind/src/Registers.hpp
+++ b/libunwind/src/Registers.hpp
@@ -20,6 +20,11 @@
 #include "libunwind_ext.h"
 #include "shadow_stack_unwind.h"
 
+#if __has_include(<sys/auxv.h>)
+#include <sys/auxv.h>
+#define HAVE_SYS_AUXV_H
+#endif
+
 namespace libunwind {
 
 // For emulating 128-bit registers
@@ -1828,6 +1833,7 @@ inline const char *Registers_ppc64::getRegisterName(int regNum) {
 /// process.
 class _LIBUNWIND_HIDDEN Registers_arm64;
 extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *);
+extern "C" bool __libunwind_Registers_arm64_za_disable();
 
 #if defined(_LIBUNWIND_USE_GCS)
 extern "C" void *__libunwind_shstk_get_jump_target() {
@@ -1837,7 +1843,7 @@ extern "C" void *__libunwind_shstk_get_jump_target() {
 
 class _LIBUNWIND_HIDDEN Registers_arm64 {
 public:
-  Registers_arm64();
+  Registers_arm64() = default;
   Registers_arm64(const void *registers);
   Registers_arm64(const Registers_arm64 &);
   Registers_arm64 &operator=(const Registers_arm64 &);
@@ -1855,7 +1861,10 @@ class _LIBUNWIND_HIDDEN Registers_arm64 {
   v128        getVectorRegister(int num) const;
   void        setVectorRegister(int num, v128 value);
   static const char *getRegisterName(int num);
-  void        jumpto() { __libunwind_Registers_arm64_jumpto(this); }
+  void jumpto() {
+    zaDisable();
+    __libunwind_Registers_arm64_jumpto(this);
+  }
   static constexpr int lastDwarfRegNum() {
     return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64;
   }
@@ -1908,25 +1917,43 @@ class _LIBUNWIND_HIDDEN Registers_arm64 {
 private:
   uint64_t lazyGetVG() const;
 
+  void zaDisable() const {
+    if (!_misc_registers.__has_sme)
+      return;
+    if (!__libunwind_Registers_arm64_za_disable())
+      _LIBUNWIND_ABORT("SME ZA disable failed");
+  }
+
+  static bool checkHasSME() {
+#if defined(HAVE_SYS_AUXV_H)
+    constexpr int hwcap2_sme = (1 << 23);
+    unsigned long hwcap2 = getauxval(AT_HWCAP2);
+    return (hwcap2 & hwcap2_sme) != 0;
+#endif
+    // TODO: Support other platforms.
+    return false;
+  }
+
   struct GPRs {
-    uint64_t __x[29]; // x0-x28
-    uint64_t __fp;    // Frame pointer x29
-    uint64_t __lr;    // Link register x30
-    uint64_t __sp;    // Stack pointer x31
-    uint64_t __pc;    // Program counter
-    uint64_t __ra_sign_state; // RA sign state register
+    uint64_t __x[29] = {};        // x0-x28
+    uint64_t __fp = 0;            // Frame pointer x29
+    uint64_t __lr = 0;            // Link register x30
+    uint64_t __sp = 0;            // Stack pointer x31
+    uint64_t __pc = 0;            // Program counter
+    uint64_t __ra_sign_state = 0; // RA sign state register
   };
 
   struct Misc {
-    mutable uint64_t __vg = 0; // Vector Granule
+    mutable uint32_t __vg = 0; // Vector Granule
+    bool __has_sme = checkHasSME();
   };
 
-  GPRs _registers;
+  GPRs _registers = {};
   // Currently only the lower double in 128-bit vectore registers
   // is perserved during unwinding.  We could define new register
   // numbers (> 96) which mean whole vector registers, then this
   // struct would need to change to contain whole vector registers.
-  double _vectorHalfRegisters[32];
+  double _vectorHalfRegisters[32] = {};
 
   // Miscellaneous/virtual registers. These are stored below the GPRs and FPRs
   // as they do not correspond to physical registers, so do not need to be
@@ -1971,10 +1998,6 @@ Registers_arm64::operator=(const Registers_arm64 &other) {
   return *this;
 }
 
-inline Registers_arm64::Registers_arm64() {
-  memset(static_cast<void *>(this), 0, sizeof(*this));
-}
-
 inline bool Registers_arm64::validRegister(int regNum) const {
   if (regNum == UNW_REG_IP)
     return true;
diff --git a/libunwind/src/UnwindRegistersSave.S b/libunwind/src/UnwindRegistersSave.S
index 619a59751151e..6bc6d4eba84ea 100644
--- a/libunwind/src/UnwindRegistersSave.S
+++ b/libunwind/src/UnwindRegistersSave.S
@@ -827,6 +827,54 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
   ret
 #endif
 
+//
+// extern "C" bool __libunwind_Registers_arm64_za_disable()
+//
+// On return:
+//  success (true/false) is returned in x0
+//
+  .p2align 2
+DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_arm64_za_disable)
+  // If TPIDR2_EL0 is null, the subroutine just disables ZA.
+  .inst 0xd53bd0b0 // mrs x16, TPIDR2_EL0
+  cbz x16, 1f
+
+  // If any of the reserved bytes in the first 16 bytes of the TPIDR2 block are
+  // nonzero, return false (libunwind will then abort).
+  ldrh w14, [x16, #10]
+  cbnz w14, 2f
+  ldr w14, [x16, #12]
+  cbnz w14, 2f
+
+  // If num_za_save_slices is zero, the subroutine just disables ZA.
+  ldrh w14, [x16, #8]
+  cbz x14, 1f
+
+  // If za_save_buffer is NULL, the subroutine just disables ZA.
+  ldr x16, [x16]
+  cbz x16, 1f
+
+  // Store ZA to za_save_buffer.
+  mov x15, xzr
+0:
+  .inst 0xe1206200 // str za[w15,0], [x16]
+  .inst 0x04305830 // addsvl x16, x16, #1
+  add x15, x15, #1
+  cmp x14, x15
+  b.ne 0b
+1:
+  // * Set TPIDR2_EL0 to null.
+  .inst 0xd51bd0bf // msr TPIDR2_EL0, xzr
+  // * Set PSTATE.ZA to 0.
+  .inst 0xd503447f // smstop za
+  // * Return true (success)
+  mov x0, #1
+  ret
+2:
+  // * Return false (failure/invalid TPIDR2 block)
+  mov x0, #0
+  ret
+
 #elif defined(__arm__) && !defined(__APPLE__)
 
 #if !defined(__ARM_ARCH_ISA_ARM)
diff --git a/libunwind/src/libunwind.cpp b/libunwind/src/libunwind.cpp
index 951d87db868bc..c2384becbd310 100644
--- a/libunwind/src/libunwind.cpp
+++ b/libunwind/src/libunwind.cpp
@@ -514,4 +514,3 @@ bool logDWARF() {
 }
 
 #endif // NDEBUG
-
diff --git a/libunwind/test/aarch64_za_unwind.pass.cpp b/libunwind/test/aarch64_za_unwind.pass.cpp
new file mode 100644
index 0000000000000..2985bb8d298de
--- /dev/null
+++ b/libunwind/test/aarch64_za_unwind.pass.cpp
@@ -0,0 +1,117 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: linux && target={{aarch64-.+}}
+
+#include <libunwind.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/auxv.h>
+
+// Basic test of unwinding with SME lazy saves. This tests libunwind disables ZA
+// (and commits a lazy save of ZA) before resuming from unwinding.
+
+// Note: This test requires SME (and is setup to pass on targets without SME).
+
+static bool checkHasSME() {
+  constexpr int hwcap2_sme = (1 << 23);
+  unsigned long hwcap2 = getauxval(AT_HWCAP2);
+  return (hwcap2 & hwcap2_sme) != 0;
+}
+
+struct TPIDR2Block {
+  void *za_save_buffer;
+  uint64_t num_save_slices;
+};
+
+__attribute__((noinline)) void private_za() {
+  // Note: Lazy save active on entry to function.
+  unw_context_t context;
+  unw_cursor_t cursor;
+
+  unw_getcontext(&context);
+  unw_init_local(&cursor, &context);
+  unw_step(&cursor);
+  unw_resume(&cursor);
+}
+
+bool isZAOn() {
+  register uint64_t svcr asm("x20");
+  asm(".inst 0xd53b4254" : "=r"(svcr));
+  return (svcr & 0b10) != 0;
+}
+
+__attribute__((noinline)) void za_function_with_lazy_save() {
+  register uint64_t tmp asm("x8");
+
+  // SMSTART ZA (should zero ZA)
+  asm(".inst 0xd503457f");
+
+  // RDSVL x8, #1 (read streaming vector length)
+  asm(".inst 0x04bf5828" : "=r"(tmp));
+
+  // Allocate and fill ZA save buffer with 0xAA.
+  size_t buffer_size = tmp * tmp;
+  uint8_t *za_save_buffer = (uint8_t *)alloca(buffer_size);
+  memset(za_save_buffer, 0xAA, buffer_size);
+
+  TPIDR2Block block = {za_save_buffer, tmp};
+  tmp = reinterpret_cast<uint64_t>(&block);
+
+  // MRS TPIDR2_EL0, x8 (setup lazy save of ZA)
+  asm(".inst 0xd51bd0a8" ::"r"(tmp));
+
+  // ZA should be on before unwinding.
+  if (!isZAOn()) {
+    fprintf(stderr, __FILE__ ": fail (ZA not on before call)\n");
+    abort();
+  } else {
+    fprintf(stderr, __FILE__ ": pass (ZA on before call)\n");
+  }
+
+  private_za();
+
+  // ZA should be off after unwinding.
+  if (isZAOn()) {
+    fprintf(stderr, __FILE__ ": fail (ZA on after unwinding)\n");
+    abort();
+  } else {
+    fprintf(stderr, __FILE__ ": pass (ZA off after unwinding)\n");
+  }
+
+  // MRS x8, TPIDR2_EL0 (read TPIDR2_EL0)
+  asm(".inst 0xd53bd0a8" : "=r"(tmp));
+  // ZA should have been saved (TPIDR2_EL0 zero).
+  if (tmp != 0) {
+    fprintf(stderr, __FILE__ ": fail (TPIDR2_EL0 non-null after unwinding)\n");
+    abort();
+  } else {
+    fprintf(stderr, __FILE__ ": pass (TPIDR2_EL0 null after unwinding)\n");
+  }
+
+  // ZA (all zero) should have been saved to the buffer.
+  for (unsigned i = 0; i < buffer_size; ++i) {
+    if (za_save_buffer[i] != 0) {
+      fprintf(stderr,
+              __FILE__ ": fail (za_save_buffer non-zero after unwinding)\n");
+      abort();
+    }
+  }
+  fprintf(stderr, __FILE__ ": pass (za_save_buffer zero'd after unwinding)\n");
+}
+
+int main(int, char **) {
+  if (!checkHasSME()) {
+    fprintf(stderr, __FILE__ ": pass (no SME support)\n");
+    return 0; // Pass (SME is required for this test to run).
+  }
+  za_function_with_lazy_save();
+  return 0;
+}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants