diff --git a/Makefile.kokkos b/Makefile.kokkos index 7137ec3936c..458e4e29215 100644 --- a/Makefile.kokkos +++ b/Makefile.kokkos @@ -13,7 +13,7 @@ KOKKOS_DEVICES ?= "Threads" # NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90 # ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX # IBM: BGQ,Power7,Power8,Power9 -# AMD-GPUS: GFX906,GFX908,GFX90A,GFX942,GFX1030,GFX1100 +# AMD-GPUS: GFX906,GFX908,GFX90A,GFX940,GFX942,GFX1030,GFX1100 # AMD-CPUS: AMDAVX,Zen,Zen2,Zen3 # Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC KOKKOS_ARCH ?= "" @@ -406,6 +406,8 @@ endif KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA906),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX906)) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA908),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX908)) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA90A),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX90A)) +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX940) +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030)) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1100)) @@ -1103,6 +1105,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx90a endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX940") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx940 +endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX942") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") diff --git a/cmake/KokkosCore_config.h.in b/cmake/KokkosCore_config.h.in index bec59ebd034..9930d2abf0f 100644 --- a/cmake/KokkosCore_config.h.in +++ b/cmake/KokkosCore_config.h.in @@ -114,6 +114,7 @@ #cmakedefine KOKKOS_ARCH_AMD_GFX906 #cmakedefine KOKKOS_ARCH_AMD_GFX908 #cmakedefine KOKKOS_ARCH_AMD_GFX90A +#cmakedefine KOKKOS_ARCH_AMD_GFX940 #cmakedefine KOKKOS_ARCH_AMD_GFX942 #cmakedefine KOKKOS_ARCH_AMD_GFX1030 #cmakedefine KOKKOS_ARCH_AMD_GFX1100 diff --git a/cmake/kokkos_arch.cmake b/cmake/kokkos_arch.cmake index bccf674d763..30764bde860 100644 --- a/cmake/kokkos_arch.cmake +++ b/cmake/kokkos_arch.cmake @@ -94,9 +94,9 @@ IF(Kokkos_ENABLE_HIP OR Kokkos_ENABLE_OPENMPTARGET OR Kokkos_ENABLE_OPENACC OR K ENDIF() # AMD archs ordered in decreasing priority of autodetection -LIST(APPEND SUPPORTED_AMD_GPUS MI300) -LIST(APPEND SUPPORTED_AMD_ARCHS AMD_GFX942) -LIST(APPEND CORRESPONDING_AMD_FLAGS gfx942) +LIST(APPEND SUPPORTED_AMD_GPUS MI300 MI300) +LIST(APPEND SUPPORTED_AMD_ARCHS AMD_GFX942 AMD_GFX940) +LIST(APPEND CORRESPONDING_AMD_FLAGS gfx942 gfx940) LIST(APPEND SUPPORTED_AMD_GPUS MI200 MI200 MI100 MI100) LIST(APPEND SUPPORTED_AMD_ARCHS VEGA90A AMD_GFX90A VEGA908 AMD_GFX908) LIST(APPEND CORRESPONDING_AMD_FLAGS gfx90a gfx90a gfx908 gfx908) diff --git a/core/src/HIP/Kokkos_HIP_Instance.hpp b/core/src/HIP/Kokkos_HIP_Instance.hpp index ef140ec46c0..63ad66686bb 100644 --- a/core/src/HIP/Kokkos_HIP_Instance.hpp +++ b/core/src/HIP/Kokkos_HIP_Instance.hpp @@ -30,7 +30,8 @@ namespace Impl { struct HIPTraits { #if defined(KOKKOS_ARCH_AMD_GFX906) || defined(KOKKOS_ARCH_AMD_GFX908) || \ - defined(KOKKOS_ARCH_AMD_GFX90A) || defined(KOKKOS_ARCH_AMD_GFX942) + defined(KOKKOS_ARCH_AMD_GFX90A) || defined(KOKKOS_ARCH_AMD_GFX940) || \ + defined(KOKKOS_ARCH_AMD_GFX942) static constexpr int WarpSize = 64; static constexpr int WarpIndexMask = 0x003f; /* hexadecimal for 63 */ static constexpr int WarpIndexShift = 6; /* WarpSize == 1 << WarpShift*/ diff --git a/generate_makefile.bash b/generate_makefile.bash index 1b216d9fe35..301a1fceb5a 100755 --- a/generate_makefile.bash +++ b/generate_makefile.bash @@ -160,6 +160,7 @@ display_help_text() { echo " AMD_GFX906 = AMD GPU MI50/MI60 GFX906" echo " AMD_GFX908 = AMD GPU MI100 GFX908" echo " AMD_GFX90A = AMD GPU MI200 GFX90A" + echo " AMD_GFX940 = AMD GPU MI300 GFX940" echo " AMD_GFX942 = AMD GPU MI300 GFX942" echo " AMD_GFX1030 = AMD GPU V620/W6800 GFX1030" echo " AMD_GFX1100 = AMD GPU RX 7900 XT(X) GFX1100"