Skip to content

Commit

Permalink
Rename AMD GPU architectures (kokkos#6266)
Browse files Browse the repository at this point in the history
* Rename AMD GPU architectures

* Use new AMD gpus macros in OpenACC and OpenMPTarget
  • Loading branch information
Rombur committed Aug 9, 2023
1 parent 3a58c3a commit 7e91f11
Show file tree
Hide file tree
Showing 14 changed files with 112 additions and 61 deletions.
2 changes: 1 addition & 1 deletion .jenkins
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ pipeline {
-DKokkos_ENABLE_BENCHMARKS=ON \
-DKokkos_ENABLE_OPENMPTARGET=ON \
-DKokkos_ENABLE_OPENMP=ON \
-DKokkos_ARCH_VEGA906=ON \
-DKokkos_ARCH_AMD_GFX906=ON \
&& \
cmake --build build --parallel ${BUILD_JOBS} && \
cd build && ctest --output-on-failure
Expand Down
41 changes: 21 additions & 20 deletions Makefile.kokkos
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ KOKKOS_DEVICES ?= "Threads"
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX
# IBM: BGQ,Power7,Power8,Power9
# AMD-GPUS: Vega906,Vega908,Vega90A,Navi1030
# AMD-GPUS: GFX906,GFX908,GFX90A,GFX1030, GFX1100
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
# Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC
KOKKOS_ARCH ?= ""
Expand Down Expand Up @@ -402,10 +402,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 0)
KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen)
endif
endif
KOKKOS_INTERNAL_USE_ARCH_VEGA906 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega906)
KOKKOS_INTERNAL_USE_ARCH_VEGA908 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega908)
KOKKOS_INTERNAL_USE_ARCH_VEGA90A := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega90A)
KOKKOS_INTERNAL_USE_ARCH_NAVI1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),Navi1030)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA906),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX906))
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA908),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX908))
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA90A),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX90A))
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030))
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1100))

# Any AVX?
KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM))
Expand Down Expand Up @@ -1075,29 +1076,29 @@ endif


# Figure out the architecture flag for ROCm.
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA906")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA")
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX906")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx906
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA908), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA908")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA")
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX908")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx908
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA90A), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA90A")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA")
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX90A")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx90a
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NAVI1030), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI1030")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI")
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1030")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1030
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NAVI1100), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI1100")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI")
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1100")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1100
endif

Expand Down
20 changes: 13 additions & 7 deletions cmake/KokkosCore_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,18 @@
#cmakedefine KOKKOS_ARCH_AMD_ZEN
#cmakedefine KOKKOS_ARCH_AMD_ZEN2
#cmakedefine KOKKOS_ARCH_AMD_ZEN3
#cmakedefine KOKKOS_ARCH_VEGA
#cmakedefine KOKKOS_ARCH_VEGA906
#cmakedefine KOKKOS_ARCH_VEGA908
#cmakedefine KOKKOS_ARCH_VEGA90A
#cmakedefine KOKKOS_ARCH_NAVI
#cmakedefine KOKKOS_ARCH_NAVI1030
#cmakedefine KOKKOS_ARCH_NAVI1100
#cmakedefine KOKKOS_ARCH_AMD_GFX906
#cmakedefine KOKKOS_ARCH_AMD_GFX908
#cmakedefine KOKKOS_ARCH_AMD_GFX90A
#cmakedefine KOKKOS_ARCH_AMD_GFX1030
#cmakedefine KOKKOS_ARCH_AMD_GFX1100
#cmakedefine KOKKOS_ARCH_AMD_GPU
#cmakedefine KOKKOS_ARCH_VEGA // deprecated
#cmakedefine KOKKOS_ARCH_VEGA906 // deprecated
#cmakedefine KOKKOS_ARCH_VEGA908 // deprecated
#cmakedefine KOKKOS_ARCH_VEGA90A // deprecated
#cmakedefine KOKKOS_ARCH_NAVI // deprecated
#cmakedefine KOKKOS_ARCH_NAVI1030 // deprecated
#cmakedefine KOKKOS_ARCH_NAVI1100 // deprecated

#cmakedefine KOKKOS_IMPL_32BIT
48 changes: 45 additions & 3 deletions cmake/kokkos_arch.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,15 @@ IF(Kokkos_ENABLE_HIP OR Kokkos_ENABLE_OPENMPTARGET OR Kokkos_ENABLE_OPENACC OR K
ENDIF()

# AMD archs ordered in decreasing priority of autodetection
LIST(APPEND SUPPORTED_AMD_GPUS MI200 MI100 MI50/60 RX7900XTX V620/W6800)
LIST(APPEND SUPPORTED_AMD_ARCHS VEGA90A VEGA908 VEGA906 NAVI1100 NAVI1030)
LIST(APPEND CORRESPONDING_AMD_FLAGS gfx90a gfx908 gfx906 gfx1100 gfx1030)
LIST(APPEND SUPPORTED_AMD_GPUS MI200 MI200 MI100 MI100)
LIST(APPEND SUPPORTED_AMD_ARCHS VEGA90A AMD_GFX90A VEGA908 AMD_GFX908)
LIST(APPEND CORRESPONDING_AMD_FLAGS gfx90a gfx90a gfx908 gfx908)
LIST(APPEND SUPPORTED_AMD_GPUS MI50/60 MI50/60)
LIST(APPEND SUPPORTED_AMD_ARCHS VEGA906 AMD_GFX906)
LIST(APPEND CORRESPONDING_AMD_FLAGS gfx906 gfx906)
LIST(APPEND SUPPORTED_AMD_GPUS RX7900XTX RX7900XTX V620/W6800 V620/W6800)
LIST(APPEND SUPPORTED_AMD_ARCHS NAVI1100 AMD_GFX1100 NAVI1030 AMD_GFX1030)
LIST(APPEND CORRESPONDING_AMD_FLAGS gfx1100 gfx1100 gfx1030 gfx1030)

#FIXME CAN BE REPLACED WITH LIST_ZIP IN CMAKE 3.17
FOREACH(ARCH IN LISTS SUPPORTED_AMD_ARCHS)
Expand Down Expand Up @@ -948,8 +954,44 @@ IF(KOKKOS_ENABLE_HIP AND NOT AMDGPU_ARCH_ALREADY_SPECIFIED)
ENDIF()
ENDIF()

FOREACH(ARCH IN LISTS SUPPORTED_AMD_ARCHS)
IF (KOKKOS_ARCH_${ARCH})
STRING(REGEX MATCH "90A" IS_90A ${ARCH})
IF(IS_90A)
SET(KOKKOS_ARCH_AMD_GFX90A ON)
SET(KOKKOS_ARCH_VEGA90A ON)
BREAK()
ENDIF()
STRING(REGEX MATCH "908" IS_908 ${ARCH})
IF(IS_908)
SET(KOKKOS_ARCH_AMD_GFX908 ON)
SET(KOKKOS_ARCH_VEGA908 ON)
BREAK()
ENDIF()
STRING(REGEX MATCH "906" IS_906 ${ARCH})
IF(IS_906)
SET(KOKKOS_ARCH_AMD_GFX906 ON)
SET(KOKKOS_ARCH_VEGA906 ON)
BREAK()
ENDIF()
STRING(REGEX MATCH "1100" IS_1100 ${ARCH})
IF(IS_1100)
SET(KOKKOS_ARCH_AMD_GFX1100 ON)
SET(KOKKOS_ARCH_NAVI1100 ON)
BREAK()
ENDIF()
STRING(REGEX MATCH "1030" IS_1030 ${ARCH})
IF(IS_1030)
SET(KOKKOS_ARCH_AMD_GFX1030 ON)
SET(KOKKOS_ARCH_NAVI1030 ON)
BREAK()
ENDIF()
ENDIF()
ENDFOREACH()

#Regardless of version, make sure we define the general architecture name
FOREACH(ARCH IN LISTS SUPPORTED_AMD_ARCHS)
SET(KOKKOS_ARCH_AMD_GPU ON)
IF (KOKKOS_ARCH_${ARCH})
STRING(REGEX MATCH "(VEGA)" IS_VEGA ${ARCH})
IF(IS_VEGA)
Expand Down
5 changes: 3 additions & 2 deletions core/src/HIP/Kokkos_HIP_Instance.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,12 @@ namespace Kokkos {
namespace Impl {

struct HIPTraits {
#if defined(KOKKOS_ARCH_VEGA)
#if defined(KOKKOS_ARCH_AMD_GFX906) || defined(KOKKOS_ARCH_AMD_GFX908) || \
defined(KOKKOS_ARCH_AMD_GFX90A)
static int constexpr WarpSize = 64;
static int constexpr WarpIndexMask = 0x003f; /* hexadecimal for 63 */
static int constexpr WarpIndexShift = 6; /* WarpSize == 1 << WarpShift*/
#elif defined(KOKKOS_ARCH_NAVI)
#elif defined(KOKKOS_ARCH_AMD_GFX1030) || defined(KOKKOS_ARCH_AMD_GFX1100)
static int constexpr WarpSize = 32;
static int constexpr WarpIndexMask = 0x001f; /* hexadecimal for 31 */
static int constexpr WarpIndexShift = 5; /* WarpSize == 1 << WarpShift*/
Expand Down
2 changes: 1 addition & 1 deletion core/src/OpenACC/Kokkos_OpenACC_Traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ struct OpenACC_Traits {
#if defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU)
static constexpr acc_device_t dev_type = acc_device_nvidia;
static constexpr bool may_fallback_to_host = false;
#elif defined(KOKKOS_ARCH_VEGA) || defined(KOKKOS_ARCH_NAVI)
#elif defined(KOKKOS_ARCH_AMD_GPU)
static constexpr acc_device_t dev_type = acc_device_radeon;
static constexpr bool may_fallback_to_host = false;
#else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,8 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>,
if (i > 0) {
local_offset_value = element_values(team_id, i - 1);
// FIXME_OPENMPTARGET We seem to access memory illegaly on AMD GPUs
#ifdef KOKKOS_ARCH_VEGA
#if defined(KOKKOS_ARCH_AMD_GPU) && !defined(KOKKOS_ARCH_AMD_GFX1030) && \
!defined(KOKKOS_ARCH_AMD_GFX1100)
if constexpr (Analysis::Reducer::has_join_member_function()) {
if constexpr (std::is_void_v<WorkTag>)
a_functor_reducer.get_functor().join(local_offset_value,
Expand Down
25 changes: 13 additions & 12 deletions core/src/impl/Kokkos_Core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -754,20 +754,21 @@ void pre_initialize_internal(const Kokkos::InitializationSettings& settings) {
#elif defined(KOKKOS_ARCH_HOPPER90)
declare_configuration_metadata("architecture", "GPU architecture",
"HOPPER90");
#elif defined(KOKKOS_ARCH_VEGA900)
declare_configuration_metadata("architecture", "GPU architecture", "VEGA900");
#elif defined(KOKKOS_ARCH_VEGA906)
declare_configuration_metadata("architecture", "GPU architecture", "VEGA906");
#elif defined(KOKKOS_ARCH_VEGA908)
declare_configuration_metadata("architecture", "GPU architecture", "VEGA908");
#elif defined(KOKKOS_ARCH_VEGA90A)
declare_configuration_metadata("architecture", "GPU architecture", "VEGA90A");
#elif defined(KOKKOS_ARCH_NAVI1030)
#elif defined(KOKKOS_ARCH_AMD_GFX906)
declare_configuration_metadata("architecture", "GPU architecture",
"NAVI1030");
#elif defined(KOKKOS_ARCH_NAVI1100)
"AMD_GFX906");
#elif defined(KOKKOS_ARCH_AMD_GFX908)
declare_configuration_metadata("architecture", "GPU architecture",
"NAVI1100");
"AMD_GFX908");
#elif defined(KOKKOS_ARCH_AMD_GFX90A)
declare_configuration_metadata("architecture", "GPU architecture",
"AMD_GFX90A");
#elif defined(KOKKOS_ARCH_AMD_GFX1030)
declare_configuration_metadata("architecture", "GPU architecture",
"AMD_GFX1030");
#elif defined(KOKKOS_ARCH_AMD_GFX1100)
declare_configuration_metadata("architecture", "GPU architecture",
"AMD_GFX1100");

#else
declare_configuration_metadata("architecture", "GPU architecture", "none");
Expand Down
4 changes: 2 additions & 2 deletions core/src/impl/Kokkos_SharedAlloc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class SharedAllocationHeader {
private:
using Record = SharedAllocationRecord<void, void>;

#if defined(KOKKOS_ARCH_VEGA) || defined(KOKKOS_ARCH_NAVI)
#if defined(KOKKOS_ARCH_AMD_GPU)
static constexpr unsigned maximum_label_length =
(1u << 8 /* 256 */) - sizeof(Record*);
#else
Expand Down Expand Up @@ -70,7 +70,7 @@ class SharedAllocationHeader {
template <>
class SharedAllocationRecord<void, void> {
protected:
#if defined(KOKKOS_ARCH_VEGA) || defined(KOKKOS_ARCH_NAVI)
#if defined(KOKKOS_ARCH_AMD_GPU)
static_assert(sizeof(SharedAllocationHeader) == (1u << 8 /* 256 */),
"sizeof(SharedAllocationHeader) != 256");
#else
Expand Down
2 changes: 1 addition & 1 deletion core/unit_test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ if(Kokkos_ENABLE_OPENACC)
endif()

# FIXME_OPENMPTARGET - Comment non-passing tests with amdclang++
# FIXME_OPENMPTARGET - Need to check on NAVI architecture
# FIXME_OPENMPTARGET - Need to check on GFX1030 and GFX1100 architectures
IF(KOKKOS_ARCH_VEGA)
SET(KOKKOS_AMDGPU_ARCH TRUE)
ENDIF()
Expand Down
6 changes: 3 additions & 3 deletions core/unit_test/TestMathematicalFunctions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -793,9 +793,9 @@ TEST(TEST_CATEGORY, mathematical_functions_exponential_functions) {
#endif

// FIXME_OPENMPTARGET FIXME_AMD
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && \
(defined(KOKKOS_ARCH_VEGA906) || defined(KOKKOS_ARCH_VEGA908) || \
defined(KOKKOS_ARCH_VEGA90A))
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && \
(defined(KOKKOS_ARCH_AMD_GFX906) || defined(KOKKOS_ARCH_AMD_GFX908) || \
defined(KOKKOS_ARCH_AMD_GFX90A))

TEST_MATH_FUNCTION(log2)({1, 23, 456, 7890});
#endif
Expand Down
3 changes: 1 addition & 2 deletions core/unit_test/TestSharedSpace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,7 @@ TEST(defaultdevicetype, shared_space) {
Kokkos::DefaultHostExecutionSpace>)
GTEST_SKIP() << "Skipping as host and device are the same space";

#if defined(KOKKOS_ARCH_VEGA906) || defined(KOKKOS_ARCH_VEGA908) || \
defined(KOKKOS_ARCH_NAVI)
#if defined(KOKKOS_ARCH_AMD_GPU) && !defined(KOKKOS_ARCH_AMD_GFX90A)
GTEST_SKIP()
<< "skipping because specified arch does not support page migration";
#endif
Expand Down
10 changes: 5 additions & 5 deletions generate_makefile.bash
Original file line number Diff line number Diff line change
Expand Up @@ -157,11 +157,11 @@ display_help_text() {
echo " ZEN2 = AMD Zen2-Core CPU"
echo " ZEN3 = AMD Zen3-Core CPU"
echo " [AMD: GPU]"
echo " VEGA906 = AMD GPU MI50/MI60 GFX906"
echo " VEGA908 = AMD GPU MI100 GFX908"
echo " VEGA90A = AMD GPU MI200 GFX90A"
echo " NAVI1030 = AMD GPU V620/W6800 GFX1030"
echo " NAVI1100 = AMD GPU RX 7900 XT(X) GFX1100"
echo " AMD_GFX906 = AMD GPU MI50/MI60 GFX906"
echo " AMD_GFX908 = AMD GPU MI100 GFX908"
echo " AMD_GFX90A = AMD GPU MI200 GFX90A"
echo " AMD_GFX1030 = AMD GPU V620/W6800 GFX1030"
echo " AMD_GFX1100 = AMD GPU RX 7900 XT(X) GFX1100"
echo " [ARM]"
echo " ARMV80 = ARMv8.0 Compatible CPU"
echo " ARMV81 = ARMv8.1 Compatible CPU"
Expand Down
2 changes: 1 addition & 1 deletion scripts/testing_scripts/test_all_sandia
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ elif [ "$MACHINE" = "caraway" ]; then
)

if [ -z "$ARCH_FLAG" ]; then
ARCH_FLAG="--arch=VEGA908"
ARCH_FLAG="--arch=AMD_GFX908"
fi

elif [ "$MACHINE" = "blake" ]; then
Expand Down

0 comments on commit 7e91f11

Please sign in to comment.