Skip to content

Commit

Permalink
Add runtime function to query the number of devices and make device I…
Browse files Browse the repository at this point in the history
…D consistent with `KOKKOS_VISIBLE_DEVICES` (kokkos#6713)

* Make Kokkos::device_id consistent with KOKKOS_VISIBLE_DEVICES

* Mask visible devices in CUDA/HIP::print_config

* fixup! Make Kokkos::device_id consistent with KOKKOS_VISIBLE_DEVICES

* Add Kokkos::num_devices() -> int

* Let num_devices() return -1 when no device backend is enabled

* Update device and threads unit test

* Skip num_devices and device_id tests if KOKKOS_VISIBLE_DEVICES env var is defined

* Fix device_id test with SYCL

* Fix HIP test GetDevice[Count]

* Enable device initialization testing for OpenMPTarget

As far as I understand it was resolved in kokkos#5492

* Improve error message when the device id cannot be determined
  • Loading branch information
dalg24 committed Jan 29, 2024
1 parent 4621c86 commit d2913cb
Show file tree
Hide file tree
Showing 8 changed files with 76 additions and 43 deletions.
5 changes: 1 addition & 4 deletions core/src/Cuda/Kokkos_Cuda_Instance.cpp
Expand Up @@ -221,10 +221,7 @@ void CudaInternal::print_configuration(std::ostream &s) const {
<< CUDA_VERSION / 1000 << "." << (CUDA_VERSION % 1000) / 10 << '\n';
#endif

int count;
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaGetDeviceCount(&count));

for (int i = 0; i < count; ++i) {
for (int i : get_visible_devices()) {
cudaDeviceProp prop;
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaGetDeviceProperties(&prop, i));
s << "Kokkos::Cuda[ " << i << " ] " << prop.name << " capability "
Expand Down
6 changes: 2 additions & 4 deletions core/src/HIP/Kokkos_HIP_Instance.cpp
Expand Up @@ -27,6 +27,7 @@
#include <HIP/Kokkos_HIP.hpp>
#include <HIP/Kokkos_HIP_Space.hpp>
#include <impl/Kokkos_CheckedIntegerOps.hpp>
#include <impl/Kokkos_DeviceManagement.hpp>
#include <impl/Kokkos_Error.hpp>

/*--------------------------------------------------------------------------*/
Expand Down Expand Up @@ -89,10 +90,7 @@ void HIPInternal::print_configuration(std::ostream &s) const {
<< '\n';
#endif

int hipDevCount;
KOKKOS_IMPL_HIP_SAFE_CALL(hipGetDeviceCount(&hipDevCount));

for (int i = 0; i < hipDevCount; ++i) {
for (int i : get_visible_devices()) {
hipDeviceProp_t hipProp;
KOKKOS_IMPL_HIP_SAFE_CALL(hipGetDeviceProperties(&hipProp, i));
std::string gpu_type = hipProp.integrated == 1 ? "APU" : "dGPU";
Expand Down
1 change: 1 addition & 0 deletions core/src/Kokkos_Core.hpp
Expand Up @@ -102,6 +102,7 @@ void declare_configuration_metadata(const std::string& category,
[[nodiscard]] bool is_finalized() noexcept;

[[nodiscard]] int device_id() noexcept;
[[nodiscard]] int num_devices() noexcept;
[[nodiscard]] int num_threads() noexcept;

bool show_warnings() noexcept;
Expand Down
35 changes: 29 additions & 6 deletions core/src/impl/Kokkos_Core.cpp
Expand Up @@ -167,20 +167,43 @@ bool is_valid_map_device_id_by(std::string const& x) {

} // namespace

std::vector<int> const& Kokkos::Impl::get_visible_devices() {
static auto devices = get_visible_devices(get_device_count());
return devices;
}

[[nodiscard]] int Kokkos::device_id() noexcept {
#if defined(KOKKOS_ENABLE_CUDA)
return Cuda().cuda_device();
int device = Cuda().cuda_device();
#elif defined(KOKKOS_ENABLE_HIP)
return HIP().hip_device();
int device = HIP().hip_device();
#elif defined(KOKKOS_ENABLE_OPENACC)
return Experimental::OpenACC().acc_device_number();
int device = Experimental::OpenACC().acc_device_number();
#elif defined(KOKKOS_ENABLE_OPENMPTARGET)
return omp_get_default_device(); // FIXME_OPENMPTARGET
int device = omp_get_default_device(); // FIXME_OPENMPTARGET
#elif defined(KOKKOS_ENABLE_SYCL)
return Experimental::Impl::SYCLInternal::m_syclDev;
int device = Experimental::Impl::SYCLInternal::m_syclDev;
#else
return -1;
int device = -1;
return device;
#endif
auto const& visible_devices = Impl::get_visible_devices();
for (std::size_t i = 0; i < visible_devices.size(); ++i) {
if (visible_devices[i] == device) {
return i;
}
}
Kokkos::abort("Unexpected error: cannot determine device id");
return -1;
}

[[nodiscard]] int Kokkos::num_devices() noexcept {
if constexpr (std::is_same_v<DefaultExecutionSpace,
DefaultHostExecutionSpace>) {
return -1; // no GPU backend enabled
} else {
return Impl::get_visible_devices().size();
}
}

[[nodiscard]] int Kokkos::num_threads() noexcept {
Expand Down
4 changes: 2 additions & 2 deletions core/src/impl/Kokkos_DeviceManagement.hpp
Expand Up @@ -25,8 +25,8 @@ namespace Impl {
int get_gpu(const Kokkos::InitializationSettings& settings);
// This declaration is provided for testing purposes only
int get_ctest_gpu(int local_rank);
// ditto
std::vector<int> get_visible_devices(int device_count);
std::vector<int> get_visible_devices(int device_count); // test-only
std::vector<int> const& get_visible_devices(); // use this instead
} // namespace Impl
} // namespace Kokkos

Expand Down
10 changes: 4 additions & 6 deletions core/unit_test/CMakeLists.txt
Expand Up @@ -1235,12 +1235,10 @@ if (NOT KOKKOS_HAS_TRILINOS)
INPUT TestDeviceAndThreads.py
${USE_SOURCE_PERMISSIONS_WHEN_SUPPORTED}
)
if(NOT Kokkos_ENABLE_OPENMPTARGET) # FIXME_OPENMPTARGET does not select the right device
add_test(
NAME Kokkos_CoreUnitTest_DeviceAndThreads
COMMAND ${Python3_EXECUTABLE} -m unittest -v $<TARGET_FILE_DIR:Kokkos_CoreUnitTest_DeviceAndThreads>/TestDeviceAndThreads.py
)
endif()
add_test(
NAME Kokkos_CoreUnitTest_DeviceAndThreads
COMMAND ${Python3_EXECUTABLE} -m unittest -v $<TARGET_FILE_DIR:Kokkos_CoreUnitTest_DeviceAndThreads>/TestDeviceAndThreads.py
)
endif()
endif()

Expand Down
21 changes: 17 additions & 4 deletions core/unit_test/TestDeviceAndThreads.py
Expand Up @@ -18,6 +18,7 @@
import unittest
import subprocess
import platform
import os

PREFIX = "$<TARGET_FILE_DIR:Kokkos_CoreUnitTest_DeviceAndThreads>"
EXECUTABLE = "$<TARGET_FILE_NAME:Kokkos_CoreUnitTest_DeviceAndThreads>"
Expand Down Expand Up @@ -64,13 +65,25 @@ def test_num_threads(self):
"num_threads",
"--kokkos-num-threads={}".format(num_threads)))

def test_num_devices(self):
if "KOKKOS_VISIBLE_DEVICES" in os.environ:
self.skipTest("KOKKOS_VISIBLE_DEVICES environment variable is set")
num_devices = GetFlag("num_devices")
self.assertNotEqual(num_devices, 0)
if num_devices == -1:
self.skipTest("no device backend enabled")
self.assertGreaterEqual(num_devices, 1)

def test_device_id(self):
device_count = GetFlag("device_count")
if device_count == 0:
self.skipTest("no device detected")
if "KOKKOS_VISIBLE_DEVICES" in os.environ:
self.skipTest("KOKKOS_VISIBLE_DEVICES environment variable is set")
num_devices = GetFlag("num_devices")
if num_devices == -1:
self.assertEqual(-1, GetFlag("device_id"))
self.skipTest("no device backend enabled")
# by default use the first GPU available for execution
self.assertEqual(0, GetFlag("device_id"))
for device_id in range(device_count):
for device_id in range(num_devices):
self.assertEqual(
device_id,
GetFlag(
Expand Down
37 changes: 20 additions & 17 deletions core/unit_test/UnitTest_DeviceAndThreads.cpp
Expand Up @@ -19,22 +19,23 @@
#include <string>
#include <thread>

int get_device_count() {
int get_num_devices() {
int num_devices;
#if defined(KOKKOS_ENABLE_CUDA)
int count;
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaGetDeviceCount(&count));
return count;
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaGetDeviceCount(&num_devices));
#elif defined(KOKKOS_ENABLE_HIP)
int count;
KOKKOS_IMPL_HIP_SAFE_CALL(hipGetDevice(&count));
return count;
KOKKOS_IMPL_HIP_SAFE_CALL(hipGetDeviceCount(&num_devices));
#elif defined(KOKKOS_ENABLE_OPENMPTARGET)
return omp_get_num_devices();
num_devices = omp_get_num_devices();
#elif defined(KOKKOS_ENABLE_OPENACC)
return acc_get_num_devices(acc_get_device_type());
num_devices = acc_get_num_devices(acc_get_device_type());
#elif defined(KOKKOS_ENABLE_SYCL)
num_devices = sycl::device::get_devices(sycl::info::device_type::gpu).size();
#else
return 0;
num_devices = -1;
#endif
assert(num_devices == Kokkos::num_devices());
return num_devices;
}

int get_device_id() {
Expand All @@ -44,15 +45,17 @@ int get_device_id() {
#elif defined(KOKKOS_ENABLE_HIP)
KOKKOS_IMPL_HIP_SAFE_CALL(hipGetDevice(&device_id));
#elif defined(KOKKOS_ENABLE_OPENMPTARGET)
device_id = omp_get_device_num();
device_id = omp_get_device_num();
#elif defined(KOKKOS_ENABLE_OPENACC)
device_id = acc_get_device_num(acc_get_device_type());
device_id = acc_get_device_num(acc_get_device_type());
#elif defined(KOKKOS_ENABLE_SYCL)
// FIXME_SYCL ?
assert(false);
return -2;
// Not able to query the underlying runtime because there is no such thing as
// device currently being used with SYCL. We go through the Kokkos runtime
// which makes the assert below pointless but it still let us check that
// Kokkos selected the device we asked for from the Python tests.
device_id = Kokkos::device_id();
#else
device_id = -1;
device_id = -1;
#endif
assert(device_id == Kokkos::device_id());
return device_id;
Expand Down Expand Up @@ -98,7 +101,7 @@ int print_flag(std::string const& flag) {
KOKKOS_TEST_PRINT_FLAG(num_threads);
KOKKOS_TEST_PRINT_FLAG(max_threads);
KOKKOS_TEST_PRINT_FLAG(device_id);
KOKKOS_TEST_PRINT_FLAG(device_count);
KOKKOS_TEST_PRINT_FLAG(num_devices);
KOKKOS_TEST_PRINT_FLAG(disable_warnings);
KOKKOS_TEST_PRINT_FLAG(tune_internals);
KOKKOS_TEST_PRINT_FLAG(hwloc_enabled);
Expand Down

0 comments on commit d2913cb

Please sign in to comment.