From ae63b1a5767b89fe5af140365f9e3ccf74feb1f0 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 22 Mar 2023 19:58:08 -0500 Subject: [PATCH] [libc] Adjust NVPTX startup code Summary: The startup code needs to include the environment pointer so we add this to the arguments. Also we need to ensure that the `crt1.o` file is made with `-fgpu-rdc` set so we can actually use it without undefined reference errors. --- libc/startup/gpu/nvptx/CMakeLists.txt | 7 ++++++- libc/startup/gpu/nvptx/start.cpp | 7 ++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/libc/startup/gpu/nvptx/CMakeLists.txt b/libc/startup/gpu/nvptx/CMakeLists.txt index f7f58ec702bf2..96ab7540cedb1 100644 --- a/libc/startup/gpu/nvptx/CMakeLists.txt +++ b/libc/startup/gpu/nvptx/CMakeLists.txt @@ -8,6 +8,7 @@ add_startup_object( -nogpulib # Do not include any GPU vendor libraries. -nostdinc -x cuda # Use the CUDA toolchain to emit the `_start` kernel. + -fgpu-rdc # Emit relocatable device code from CUDA. --offload-device-only --offload-arch=${LIBC_GPU_TARGET_ARCHITECTURE} NO_GPU_BUNDLE # Compile this file directly without special GPU handling. @@ -15,4 +16,8 @@ add_startup_object( get_fq_target_name(crt1 fq_name) # Ensure that clang uses the correct linker for this object type. -target_link_libraries(${fq_name} PUBLIC "--target=${LIBC_GPU_TARGET_TRIPLE}") +target_link_libraries(${fq_name} + PUBLIC + "-march=${LIBC_GPU_TARGET_ARCHITECTURE}" + "--target=${LIBC_GPU_TARGET_TRIPLE}" +) diff --git a/libc/startup/gpu/nvptx/start.cpp b/libc/startup/gpu/nvptx/start.cpp index 61569423c7b55..cf4077c3d9edd 100644 --- a/libc/startup/gpu/nvptx/start.cpp +++ b/libc/startup/gpu/nvptx/start.cpp @@ -6,10 +6,11 @@ // //===----------------------------------------------------------------------===// -extern "C" __attribute__((device)) int main(int argc, char **argv); +extern "C" __attribute__((device)) int main(int argc, char **argv, char **envp); // TODO: We shouldn't need to use the CUDA language to emit a kernel for NVPTX. extern "C" [[gnu::visibility("protected")]] __attribute__((global)) void -_start(int argc, char **argv, int *ret) { - __atomic_fetch_or(ret, main(argc, argv), __ATOMIC_RELAXED); +_start(int argc, char **argv, char **envp, int *ret, void *in, void *out, + void *buffer) { + __atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED); }