Skip to content

Commit

Permalink
OpenMPTarget: Changes for OpenMPTarget backend with nvhpc compiler.
Browse files Browse the repository at this point in the history
  • Loading branch information
Rahulkumar Gayatri committed May 30, 2023
1 parent ab6f756 commit bbd9a78
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 85 deletions.
175 changes: 91 additions & 84 deletions Makefile.kokkos
Original file line number Diff line number Diff line change
Expand Up @@ -265,11 +265,16 @@ else
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
endif
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL_CLANG), 1)
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fiopenmp -Wno-openmp-mapping
else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 0)
#Assume GCC
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fopenmp -foffload=nvptx-none

ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL_CLANG), 1)
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fiopenmp -Wno-openmp-mapping
else ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 1)
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -mp=gpu
else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 0)
#Assume GCC
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fopenmp -foffload=nvptx-none
endif
endif

ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1)
Expand Down Expand Up @@ -970,86 +975,88 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
endif
endif

# Do not add this flag it its the cray compiler.
# Do not add this flag it its the cray compiler or the nvhpc compiler.
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY_CLANG), 0)
# Lets start with adding architecture defines
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER30")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_30
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER32")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_32
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER35")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_35
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER37")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_37
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL50")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_50
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL52")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_52
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL53")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_53
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL60")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_60
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL61")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA70), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA70")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_70
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA72), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA72")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_72
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_TURING75), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_TURING75")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_75
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE80), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE80")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_80
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE86), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE86")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_86
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ADA89), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ADA89")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_89
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HOPPER90), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HOPPER")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HOPPER90")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_90
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0)
# Lets start with adding architecture defines
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER30")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_30
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER32")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_32
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER35")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_35
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER37")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_37
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL50")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_50
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL52")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_52
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL53")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_53
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL60")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_60
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL61")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA70), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA70")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_70
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA72), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA72")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_72
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_TURING75), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_TURING75")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_75
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE80), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE80")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_80
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE86), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE86")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_86
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ADA89), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ADA89")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_89
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HOPPER90), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HOPPER")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HOPPER90")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_90
endif
endif
endif

Expand Down
2 changes: 1 addition & 1 deletion tpls/desul/include/desul/atomics/Thread_Fence_OpenMP.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ SPDX-License-Identifier: (BSD-3-Clause)
namespace desul {
namespace Impl {

#if _OPENMP > 201800
#if _OPENMP > 201800 && !defined(KOKKOS_COMPILER_NVHPC)

// There is no seq_cst flush in OpenMP, isn't it the same anyway for fence?
inline void host_atomic_thread_fence(MemoryOrderSeqCst, MemoryScopeCore) {
Expand Down

0 comments on commit bbd9a78

Please sign in to comment.