Skip to content

Commit

Permalink
Merge pull request #6671 from rbberger/add_mi300_gfx940
Browse files Browse the repository at this point in the history
Add missing gfx940

(cherry picked from commit 64a7774)
  • Loading branch information
dalg24 authored and ndellingwood committed Jan 17, 2024
1 parent 474366a commit f53b18b
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 5 deletions.
9 changes: 8 additions & 1 deletion Makefile.kokkos
Expand Up @@ -13,7 +13,7 @@ KOKKOS_DEVICES ?= "Threads"
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX
# IBM: BGQ,Power7,Power8,Power9
# AMD-GPUS: GFX906,GFX908,GFX90A,GFX942,GFX1030,GFX1100
# AMD-GPUS: GFX906,GFX908,GFX90A,GFX940,GFX942,GFX1030,GFX1100
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
# Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC
KOKKOS_ARCH ?= ""
Expand Down Expand Up @@ -406,6 +406,8 @@ endif
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA906),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX906))
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA908),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX908))
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA90A),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX90A))
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX940)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030))
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1100))

Expand Down Expand Up @@ -1103,6 +1105,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx90a
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX940")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx940
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX942")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
Expand Down
1 change: 1 addition & 0 deletions cmake/KokkosCore_config.h.in
Expand Up @@ -114,6 +114,7 @@
#cmakedefine KOKKOS_ARCH_AMD_GFX906
#cmakedefine KOKKOS_ARCH_AMD_GFX908
#cmakedefine KOKKOS_ARCH_AMD_GFX90A
#cmakedefine KOKKOS_ARCH_AMD_GFX940
#cmakedefine KOKKOS_ARCH_AMD_GFX942
#cmakedefine KOKKOS_ARCH_AMD_GFX1030
#cmakedefine KOKKOS_ARCH_AMD_GFX1100
Expand Down
6 changes: 3 additions & 3 deletions cmake/kokkos_arch.cmake
Expand Up @@ -94,9 +94,9 @@ IF(Kokkos_ENABLE_HIP OR Kokkos_ENABLE_OPENMPTARGET OR Kokkos_ENABLE_OPENACC OR K
ENDIF()

# AMD archs ordered in decreasing priority of autodetection
LIST(APPEND SUPPORTED_AMD_GPUS MI300)
LIST(APPEND SUPPORTED_AMD_ARCHS AMD_GFX942)
LIST(APPEND CORRESPONDING_AMD_FLAGS gfx942)
LIST(APPEND SUPPORTED_AMD_GPUS MI300 MI300)
LIST(APPEND SUPPORTED_AMD_ARCHS AMD_GFX942 AMD_GFX940)
LIST(APPEND CORRESPONDING_AMD_FLAGS gfx942 gfx940)
LIST(APPEND SUPPORTED_AMD_GPUS MI200 MI200 MI100 MI100)
LIST(APPEND SUPPORTED_AMD_ARCHS VEGA90A AMD_GFX90A VEGA908 AMD_GFX908)
LIST(APPEND CORRESPONDING_AMD_FLAGS gfx90a gfx90a gfx908 gfx908)
Expand Down
3 changes: 2 additions & 1 deletion core/src/HIP/Kokkos_HIP_Instance.hpp
Expand Up @@ -30,7 +30,8 @@ namespace Impl {

struct HIPTraits {
#if defined(KOKKOS_ARCH_AMD_GFX906) || defined(KOKKOS_ARCH_AMD_GFX908) || \
defined(KOKKOS_ARCH_AMD_GFX90A) || defined(KOKKOS_ARCH_AMD_GFX942)
defined(KOKKOS_ARCH_AMD_GFX90A) || defined(KOKKOS_ARCH_AMD_GFX940) || \
defined(KOKKOS_ARCH_AMD_GFX942)
static constexpr int WarpSize = 64;
static constexpr int WarpIndexMask = 0x003f; /* hexadecimal for 63 */
static constexpr int WarpIndexShift = 6; /* WarpSize == 1 << WarpShift*/
Expand Down
1 change: 1 addition & 0 deletions generate_makefile.bash
Expand Up @@ -160,6 +160,7 @@ display_help_text() {
echo " AMD_GFX906 = AMD GPU MI50/MI60 GFX906"
echo " AMD_GFX908 = AMD GPU MI100 GFX908"
echo " AMD_GFX90A = AMD GPU MI200 GFX90A"
echo " AMD_GFX940 = AMD GPU MI300 GFX940"
echo " AMD_GFX942 = AMD GPU MI300 GFX942"
echo " AMD_GFX1030 = AMD GPU V620/W6800 GFX1030"
echo " AMD_GFX1100 = AMD GPU RX 7900 XT(X) GFX1100"
Expand Down

0 comments on commit f53b18b

Please sign in to comment.