Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[OpenMP] Initial implementation of OpenMP offloading library - libomp…
…target device RTLs. This patch implements the device runtime library whose interface is used in the code generation for OpenMP offloading devices. Currently there is a single device RTL written in CUDA meant to CUDA enabled GPUs. The interface is a variation of the kmpc interface that includes some extra calls to do thread and storage management that only make sense for a GPU target. Differential revision: https://reviews.llvm.org/D14254 llvm-svn: 323649
- Loading branch information
George Rokos
committed
Jan 29, 2018
1 parent
e726410
commit 0dd6ed7
Showing
27 changed files
with
5,897 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
##===----------------------------------------------------------------------===## | ||
# | ||
# The LLVM Compiler Infrastructure | ||
# | ||
# This file is dual licensed under the MIT and the University of Illinois Open | ||
# Source Licenses. See LICENSE.txt for details. | ||
# | ||
# ##===----------------------------------------------------------------------===## | ||
# | ||
# Build a device RTL for each available machine available. | ||
# | ||
##===----------------------------------------------------------------------===## | ||
|
||
add_subdirectory(nvptx) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,200 @@ | ||
##===----------------------------------------------------------------------===## | ||
# | ||
# The LLVM Compiler Infrastructure | ||
# | ||
# This file is dual licensed under the MIT and the University of Illinois Open | ||
# Source Licenses. See LICENSE.txt for details. | ||
# | ||
##===----------------------------------------------------------------------===## | ||
# | ||
# Build the NVPTX (CUDA) Device RTL if the CUDA tools are available | ||
# | ||
##===----------------------------------------------------------------------===## | ||
|
||
set(LIBOMPTARGET_NVPTX_ALTERNATE_HOST_COMPILER "" CACHE STRING | ||
"Path to alternate NVCC host compiler to be used by the NVPTX device RTL.") | ||
|
||
if(LIBOMPTARGET_NVPTX_ALTERNATE_HOST_COMPILER) | ||
find_program(ALTERNATE_CUDA_HOST_COMPILER NAMES ${LIBOMPTARGET_NVPTX_ALTERNATE_HOST_COMPILER}) | ||
if(NOT ALTERNATE_CUDA_HOST_COMPILER) | ||
libomptarget_say("Not building CUDA offloading device RTL: invalid NVPTX alternate host compiler.") | ||
endif() | ||
set(CUDA_HOST_COMPILER ${ALTERNATE_CUDA_HOST_COMPILER} CACHE FILEPATH "" FORCE) | ||
endif() | ||
|
||
# We can't use clang as nvcc host preprocessor, so we attempt to replace it with | ||
# gcc. | ||
if(CUDA_HOST_COMPILER MATCHES clang) | ||
|
||
find_program(LIBOMPTARGET_NVPTX_ALTERNATE_GCC_HOST_COMPILER NAMES gcc) | ||
|
||
if(NOT LIBOMPTARGET_NVPTX_ALTERNATE_GCC_HOST_COMPILER) | ||
libomptarget_say("Not building CUDA offloading device RTL: clang is not supported as NVCC host compiler.") | ||
libomptarget_say("Please include gcc in your path or set LIBOMPTARGET_NVPTX_ALTERNATE_HOST_COMPILER to the full path of of valid compiler.") | ||
return() | ||
endif() | ||
set(CUDA_HOST_COMPILER "${LIBOMPTARGET_NVPTX_ALTERNATE_GCC_HOST_COMPILER}" CACHE FILEPATH "" FORCE) | ||
endif() | ||
|
||
if(LIBOMPTARGET_DEP_CUDA_FOUND) | ||
libomptarget_say("Building CUDA offloading device RTL.") | ||
|
||
# We really don't have any host code, so we don't need to care about | ||
# propagating host flags. | ||
set(CUDA_PROPAGATE_HOST_FLAGS OFF) | ||
|
||
set(cuda_src_files | ||
src/cancel.cu | ||
src/critical.cu | ||
src/data_sharing.cu | ||
src/libcall.cu | ||
src/loop.cu | ||
src/omptarget-nvptx.cu | ||
src/parallel.cu | ||
src/reduction.cu | ||
src/sync.cu | ||
src/task.cu | ||
) | ||
|
||
set(omp_data_objects src/omp_data.cu) | ||
|
||
# Get the compute capability the user requested or use SM_35 by default. | ||
# SM_35 is what clang uses by default. | ||
set(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY 35 CACHE STRING | ||
"CUDA Compute Capability to be used to compile the NVPTX device RTL.") | ||
set(CUDA_ARCH -arch sm_${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY}) | ||
|
||
# Activate RTL message dumps if requested by the user. | ||
set(LIBOMPTARGET_NVPTX_DEBUG FALSE CACHE BOOL | ||
"Activate NVPTX device RTL debug messages.") | ||
if(${LIBOMPTARGET_NVPTX_DEBUG}) | ||
set(CUDA_DEBUG -DOMPTARGET_NVPTX_DEBUG=-1 -g --ptxas-options=-v) | ||
endif() | ||
|
||
# NVPTX runtime library has to be statically linked. Dynamic linking is not | ||
# yet supported by the CUDA toolchain on the device. | ||
set(BUILD_SHARED_LIBS OFF) | ||
set(CUDA_SEPARABLE_COMPILATION ON) | ||
|
||
cuda_add_library(omptarget-nvptx STATIC ${cuda_src_files} ${omp_data_objects} | ||
OPTIONS ${CUDA_ARCH} ${CUDA_DEBUG}) | ||
|
||
# Install device RTL under the lib destination folder. | ||
install(TARGETS omptarget-nvptx ARCHIVE DESTINATION "lib") | ||
|
||
target_link_libraries(omptarget-nvptx ${CUDA_LIBRARIES}) | ||
|
||
# Check if we can create an LLVM bitcode implementation of the runtime library | ||
# that could be inlined in the user implementation. | ||
set(LIBOMPTARGET_NVPTX_ENABLE_BCLIB FALSE CACHE BOOL | ||
"Enable CUDA LLVM bitcode offloading device RTL.") | ||
if (${LIBOMPTARGET_NVPTX_ENABLE_BCLIB}) | ||
|
||
# Find a clang compiler capable of compiling cuda files to LLVM bitcode and | ||
# an LLVM linker. | ||
# We use the one provided by the user, attempt to use the one used to build | ||
# libomptarget or just fail. | ||
|
||
set(LIBOMPTARGET_NVPTX_CUDA_COMPILER "" CACHE STRING | ||
"Location of a CUDA compiler capable of emitting LLVM bitcode.") | ||
set(LIBOMPTARGET_NVPTX_BC_LINKER "" CACHE STRING | ||
"Location of a linker capable of linking LLVM bitcode objects.") | ||
|
||
if (NOT LIBOMPTARGET_NVPTX_CUDA_COMPILER STREQUAL "") | ||
set(LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER ${LIBOMPTARGET_NVPTX_CUDA_COMPILER}) | ||
elseif(${CMAKE_C_COMPILER_ID} STREQUAL "Clang") | ||
set(LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER ${CMAKE_C_COMPILER}) | ||
else() | ||
libomptarget_error_say("Cannot find a CUDA compiler capable of emitting LLVM bitcode.") | ||
libomptarget_error_say("Please configure with flag -DLIBOMPTARGET_NVPTX_CUDA_COMPILER") | ||
endif() | ||
|
||
# Get compiler directory to try to locate a suitable linker | ||
get_filename_component(COMPILER_DIR ${CMAKE_C_COMPILER} DIRECTORY) | ||
|
||
if (NOT LIBOMPTARGET_NVPTX_BC_LINKER STREQUAL "") | ||
set(LIBOMPTARGET_NVPTX_SELECTED_BC_LINKER ${LIBOMPTARGET_NVPTX_BC_LINKER}) | ||
elseif(${CMAKE_C_COMPILER_ID} STREQUAL "Clang" AND EXISTS "${COMPILER_DIR}/llvm-link") | ||
# Use llvm-link from the directory containing clang | ||
set(LIBOMPTARGET_NVPTX_SELECTED_BC_LINKER ${COMPILER_DIR}/llvm-link) | ||
else() | ||
libomptarget_error_say("Cannot find a linker capable of linking LLVM bitcode objects.") | ||
libomptarget_error_say("Please configure with flag -DLIBOMPTARGET_NVPTX_BC_LINKER") | ||
endif() | ||
|
||
if(LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER AND LIBOMPTARGET_NVPTX_SELECTED_BC_LINKER) | ||
libomptarget_say("Building CUDA LLVM bitcode offloading device RTL.") | ||
|
||
# Decide which ptx version to use. Same choices as Clang. | ||
if(CUDA_VERSION_MAJOR GREATER 9 OR CUDA_VERSION_MAJOR EQUAL 9) | ||
set(CUDA_PTX_VERSION ptx60) | ||
else() | ||
set(CUDA_PTX_VERSION ptx42) | ||
endif() | ||
|
||
# Set flags for Clang cuda compilation. Only Clang is supported because there is | ||
# no other compiler capable of generating bitcode from cuda sources. | ||
set(CUDA_FLAGS | ||
-emit-llvm | ||
-O1 | ||
-Xclang -target-feature | ||
-Xclang +${CUDA_PTX_VERSION} | ||
--cuda-device-only | ||
-DOMPTARGET_NVPTX_TEST=0 -DOMPTARGET_NVPTX_DEBUG=0 | ||
) | ||
|
||
# CUDA 9 header files use the nv_weak attribute which clang is not yet prepared | ||
# to handle. Therefore, we use 'weak' instead. We are compiling only for the | ||
# device, so it should be equivalent. | ||
if(CUDA_VERSION_MAJOR EQUAL 9) | ||
set(CUDA_FLAGS ${CUDA_FLAGS} -Dnv_weak=weak) | ||
endif() | ||
|
||
# Get the compute capability the user requested or use SM_35 by default. | ||
set(CUDA_ARCH "") | ||
set(CUDA_ARCH --cuda-gpu-arch=sm_${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY}) | ||
|
||
# Compile cuda files to bitcode. | ||
set(bc_files "") | ||
foreach(src ${cuda_src_files}) | ||
get_filename_component(infile ${src} ABSOLUTE) | ||
get_filename_component(outfile ${src} NAME) | ||
|
||
add_custom_command(OUTPUT ${outfile}.bc | ||
COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER} ${CUDA_FLAGS} ${CUDA_ARCH} ${CUDA_INCLUDES} | ||
-c ${infile} -o ${outfile}.bc | ||
DEPENDS ${infile} | ||
IMPLICIT_DEPENDS CXX ${infile} | ||
COMMENT "Building LLVM bitcode ${outfile}.bc" | ||
VERBATIM | ||
) | ||
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${outfile}.bc) | ||
|
||
list(APPEND bc_files ${outfile}.bc) | ||
endforeach() | ||
|
||
# Link to a bitcode library. | ||
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx.bc | ||
COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_BC_LINKER} | ||
-o ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx.bc ${bc_files} | ||
DEPENDS ${bc_files} | ||
COMMENT "Linking LLVM bitcode libomptarget-nvptx.bc" | ||
) | ||
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES libomptarget-nvptx.bc) | ||
|
||
add_custom_target(omptarget-nvptx-bc ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx.bc) | ||
|
||
# Copy library to destination. | ||
add_custom_command(TARGET omptarget-nvptx-bc POST_BUILD | ||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx.bc | ||
$<TARGET_FILE_DIR:omptarget-nvptx>) | ||
|
||
# Install device RTL under the lib destination folder. | ||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx.bc DESTINATION "lib") | ||
|
||
endif() | ||
endif() | ||
|
||
else() | ||
libomptarget_say("Not building CUDA offloading device RTL: CUDA tools not found in the system.") | ||
endif() |
Oops, something went wrong.