Skip to content

Commit

Permalink
Extend 'hip_driver_check_page_migration' (kokkos#6364)
Browse files Browse the repository at this point in the history
* Teach 'hip_driver_check_page_migration' about hipDeviceAttributePageableMemoryAccess

This attribute returns true if either HSA_XNACK=1, or amdgpu.noretry=0, meaning we no longer have to guess whether pagable HMM is enabled, e.g.:

	$ ./a.out
	Pagable? 0
	$ HSA_XNACK=1 ./a.out
	Pagable? 1
	$ cat /proc/cmdline
	<... amdgpu.noretry=0 ...>
	$ ./a.out
	Pagable? 1
	$ HSA_XNACK=1 ./a.out
	Pagable? 1

In addition, refactor hip_driver_check_page_migration to be part of HIPManagedSpace, such that we can
automatically skip the defaultdevicetype::shared_space if XNACK is not enabled.

This helps avoid false positive failures on CI runs on an MI-210/250/250X.

Change-Id: Iae6a7e22ce0471d9a6fe0813a96423eba86c5641

* apply formatting

Change-Id: I91473b1d5e9a5293bbde9fa03863aacd7a261b75

* rename to impl

Change-Id: I2b3f3a43e2f929d597d7a86da18782d1547a3533

---------

Co-authored-by: Nicholas Curtis <nicurtis@amd.com>
  • Loading branch information
Nick Curtis and Nicholas Curtis committed Aug 17, 2023
1 parent 04d5c55 commit ab10d2b
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 12 deletions.
23 changes: 14 additions & 9 deletions core/src/HIP/Kokkos_HIP_Space.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,6 @@ namespace {

static std::atomic<bool> is_first_hip_managed_allocation(true);

bool hip_driver_check_page_migration(int deviceId) {
// check with driver if page migrating memory is available
// this driver query is copied from the hip documentation
int hasManagedMemory = 0; // false by default
KOKKOS_IMPL_HIP_SAFE_CALL(hipDeviceGetAttribute(
&hasManagedMemory, hipDeviceAttributeManagedMemory, deviceId));
return static_cast<bool>(hasManagedMemory);
}
} // namespace

/*--------------------------------------------------------------------------*/
Expand Down Expand Up @@ -153,7 +145,7 @@ void* HIPManagedSpace::impl_allocate(
if (is_first_hip_managed_allocation.exchange(false) &&
Kokkos::show_warnings()) {
do { // hack to avoid spamming users with too many warnings
if (!hip_driver_check_page_migration(m_device)) {
if (!impl_hip_driver_check_page_migration()) {
std::cerr << R"warning(
Kokkos::HIP::allocation WARNING: The combination of device and system configuration
does not support page migration between device and host.
Expand Down Expand Up @@ -205,6 +197,19 @@ Kokkos::HIP::runtime WARNING: Kokkos did not find an environment variable 'HSA_X

return ptr;
}
bool HIPManagedSpace::impl_hip_driver_check_page_migration() const {
// check with driver if page migrating memory is available
// this driver query is copied from the hip documentation
int hasManagedMemory = 0; // false by default
KOKKOS_IMPL_HIP_SAFE_CALL(hipDeviceGetAttribute(
&hasManagedMemory, hipDeviceAttributeManagedMemory, m_device));
if (!static_cast<bool>(hasManagedMemory)) return false;
// next, check pageableMemoryAccess
int hasPageableMemory = 0; // false by default
KOKKOS_IMPL_HIP_SAFE_CALL(hipDeviceGetAttribute(
&hasPageableMemory, hipDeviceAttributePageableMemoryAccess, m_device));
return static_cast<bool>(hasPageableMemory);
}

void HIPSpace::deallocate(void* const arg_alloc_ptr,
const size_t arg_alloc_size) const {
Expand Down
3 changes: 3 additions & 0 deletions core/src/HIP/Kokkos_HIP_Space.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,9 @@ class HIPManagedSpace {
const size_t arg_alloc_size,
const size_t arg_logical_size = 0) const;

// internal only method to determine whether page migration is supported
bool impl_hip_driver_check_page_migration() const;

private:
int m_device; ///< Which HIP device
template <class, class, class, class>
Expand Down
7 changes: 4 additions & 3 deletions core/unit_test/TestSharedSpace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,10 @@ TEST(defaultdevicetype, shared_space) {
Kokkos::DefaultHostExecutionSpace>)
GTEST_SKIP() << "Skipping as host and device are the same space";

#if defined(KOKKOS_ARCH_AMD_GPU) && !defined(KOKKOS_ARCH_AMD_GFX90A)
GTEST_SKIP()
<< "skipping because specified arch does not support page migration";
#if defined(KOKKOS_ARCH_AMD_GPU) && defined(KOKKOS_ENABLE_HIP)
if (!Kokkos::SharedSpace().impl_hip_driver_check_page_migration())
GTEST_SKIP()
<< "skipping because specified arch does not support page migration";
#endif
#if defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOS_ARCH_INTEL_GPU)
GTEST_SKIP()
Expand Down

0 comments on commit ab10d2b

Please sign in to comment.