From 42a7a45c845de377b9b714af39a449fdc49eb768 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Fri, 21 Jun 2024 19:10:40 -0500 Subject: [PATCH] [libc] Implement (v|f)printf on the GPU Summary: This patch implements the `printf` family of functions on the GPU using the new variadic support. This patch adapts the old handling in the `rpc_fprintf` placeholder, but adds an extra RPC call to get the size of the buffer to copy. This prevents the GPU from needing to parse the string. While it's theoretically possible for the pass to know the size of the struct, it's prohibitively difficult to do while maintaining ABI compatibility with NVIDIA's varargs. Depends on https://github.com/llvm/llvm-project/pull/96015. --- .../ClangLinkerWrapper.cpp | 1 + libc/config/gpu/entrypoints.txt | 19 ++--- libc/src/__support/arg_list.h | 3 +- libc/src/gpu/rpc_fprintf.cpp | 5 +- libc/src/stdio/CMakeLists.txt | 24 +----- libc/src/stdio/generic/CMakeLists.txt | 25 +++++++ libc/src/stdio/{ => generic}/fprintf.cpp | 0 libc/src/stdio/{ => generic}/vfprintf.cpp | 0 libc/src/stdio/gpu/CMakeLists.txt | 48 ++++++++++++ libc/src/stdio/gpu/fprintf.cpp | 32 ++++++++ libc/src/stdio/gpu/printf.cpp | 30 ++++++++ libc/src/stdio/gpu/vfprintf.cpp | 29 ++++++++ libc/src/stdio/gpu/vfprintf_utils.h | 73 +++++++++++++++++++ libc/src/stdio/gpu/vprintf.cpp | 28 +++++++ .../integration/src/stdio/gpu/CMakeLists.txt | 2 +- .../test/integration/src/stdio/gpu/printf.cpp | 43 ++++------- libc/utils/gpu/server/rpc_server.cpp | 24 +++++- llvm/lib/Transforms/IPO/ExpandVariadics.cpp | 8 +- 18 files changed, 326 insertions(+), 68 deletions(-) rename libc/src/stdio/{ => generic}/fprintf.cpp (100%) rename libc/src/stdio/{ => generic}/vfprintf.cpp (100%) create mode 100644 libc/src/stdio/gpu/fprintf.cpp create mode 100644 libc/src/stdio/gpu/printf.cpp create mode 100644 libc/src/stdio/gpu/vfprintf.cpp create mode 100644 libc/src/stdio/gpu/vfprintf_utils.h create mode 100644 libc/src/stdio/gpu/vprintf.cpp diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index cdfe8cfbd9379..03fd23ae39c29 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -1671,6 +1671,7 @@ int main(int Argc, char **Argv) { NewArgv.push_back(Arg->getValue()); for (const opt::Arg *Arg : Args.filtered(OPT_offload_opt_eq_minus)) NewArgv.push_back(Args.MakeArgString(StringRef("-") + Arg->getValue())); + llvm::errs() << "asdfasdf\n"; cl::ParseCommandLineOptions(NewArgv.size(), &NewArgv[0]); Verbose = Args.hasArg(OPT_verbose); diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt index 2217a696fc5d1..de1ca6bfd151f 100644 --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/entrypoints.txt @@ -1,13 +1,3 @@ -if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) - set(extra_entrypoints - # stdio.h entrypoints - libc.src.stdio.sprintf - libc.src.stdio.snprintf - libc.src.stdio.vsprintf - libc.src.stdio.vsnprintf - ) -endif() - set(TARGET_LIBC_ENTRYPOINTS # assert.h entrypoints libc.src.assert.__assert_fail @@ -185,7 +175,14 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.errno.errno # stdio.h entrypoints - ${extra_entrypoints} + libc.src.stdio.printf + libc.src.stdio.vprintf + libc.src.stdio.fprintf + libc.src.stdio.vfprintf + libc.src.stdio.sprintf + libc.src.stdio.snprintf + libc.src.stdio.vsprintf + libc.src.stdio.vsnprintf libc.src.stdio.feof libc.src.stdio.ferror libc.src.stdio.fseek diff --git a/libc/src/__support/arg_list.h b/libc/src/__support/arg_list.h index 0965e12afd562..3a4e5ad0fab3c 100644 --- a/libc/src/__support/arg_list.h +++ b/libc/src/__support/arg_list.h @@ -54,7 +54,8 @@ class MockArgList { } template LIBC_INLINE T next_var() { - ++arg_counter; + arg_counter = + ((arg_counter + alignof(T) - 1) / alignof(T)) * alignof(T) + sizeof(T); return T(arg_counter); } diff --git a/libc/src/gpu/rpc_fprintf.cpp b/libc/src/gpu/rpc_fprintf.cpp index 7b0e60b59baf3..659144d133004 100644 --- a/libc/src/gpu/rpc_fprintf.cpp +++ b/libc/src/gpu/rpc_fprintf.cpp @@ -29,6 +29,9 @@ int fprintf_impl(::FILE *__restrict file, const char *__restrict format, } port.send_n(format, format_size); + port.recv([&](rpc::Buffer *buffer) { + args_size = static_cast(buffer->data[0]); + }); port.send_n(args, args_size); uint32_t ret = 0; @@ -50,7 +53,7 @@ int fprintf_impl(::FILE *__restrict file, const char *__restrict format, return ret; } -// TODO: This is a stand-in function that uses a struct pointer and size in +// TODO: Delete this and port OpenMP to use `printf`. // place of varargs. Once varargs support is added we will use that to // implement the real version. LLVM_LIBC_FUNCTION(int, rpc_fprintf, diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt index a659d9e847a9e..3c536a287b2c4 100644 --- a/libc/src/stdio/CMakeLists.txt +++ b/libc/src/stdio/CMakeLists.txt @@ -159,17 +159,6 @@ add_entrypoint_object( libc.src.stdio.printf_core.writer ) -add_entrypoint_object( - fprintf - SRCS - fprintf.cpp - HDRS - fprintf.h - DEPENDS - libc.src.__support.arg_list - libc.src.stdio.printf_core.vfprintf_internal -) - add_entrypoint_object( vsprintf SRCS @@ -192,17 +181,6 @@ add_entrypoint_object( libc.src.stdio.printf_core.writer ) -add_entrypoint_object( - vfprintf - SRCS - vfprintf.cpp - HDRS - vfprintf.h - DEPENDS - libc.src.__support.arg_list - libc.src.stdio.printf_core.vfprintf_internal -) - add_stdio_entrypoint_object( fileno SRCS @@ -261,6 +239,7 @@ add_stdio_entrypoint_object(fputc) add_stdio_entrypoint_object(putc) add_stdio_entrypoint_object(putchar) add_stdio_entrypoint_object(printf) +add_stdio_entrypoint_object(fprintf) add_stdio_entrypoint_object(fgetc) add_stdio_entrypoint_object(fgetc_unlocked) add_stdio_entrypoint_object(getc) @@ -273,3 +252,4 @@ add_stdio_entrypoint_object(stdin) add_stdio_entrypoint_object(stdout) add_stdio_entrypoint_object(stderr) add_stdio_entrypoint_object(vprintf) +add_stdio_entrypoint_object(vfprintf) diff --git a/libc/src/stdio/generic/CMakeLists.txt b/libc/src/stdio/generic/CMakeLists.txt index 9cd4cfdae17f4..e0a1c577efcb6 100644 --- a/libc/src/stdio/generic/CMakeLists.txt +++ b/libc/src/stdio/generic/CMakeLists.txt @@ -396,6 +396,31 @@ add_entrypoint_object( ${printf_deps} ) +add_entrypoint_object( + fprintf + SRCS + fprintf.cpp + HDRS + ../fprintf.h + DEPENDS + libc.src.__support.arg_list + libc.src.stdio.printf_core.vfprintf_internal + ${printf_deps} +) + +add_entrypoint_object( + vfprintf + SRCS + vfprintf.cpp + HDRS + ../vfprintf.h + DEPENDS + libc.src.__support.arg_list + libc.src.stdio.printf_core.vfprintf_internal + ${printf_deps} +) + + add_entrypoint_object( fgets SRCS diff --git a/libc/src/stdio/fprintf.cpp b/libc/src/stdio/generic/fprintf.cpp similarity index 100% rename from libc/src/stdio/fprintf.cpp rename to libc/src/stdio/generic/fprintf.cpp diff --git a/libc/src/stdio/vfprintf.cpp b/libc/src/stdio/generic/vfprintf.cpp similarity index 100% rename from libc/src/stdio/vfprintf.cpp rename to libc/src/stdio/generic/vfprintf.cpp diff --git a/libc/src/stdio/gpu/CMakeLists.txt b/libc/src/stdio/gpu/CMakeLists.txt index 1b1e2a903cc0b..280e0d3f6b00d 100644 --- a/libc/src/stdio/gpu/CMakeLists.txt +++ b/libc/src/stdio/gpu/CMakeLists.txt @@ -10,6 +10,14 @@ add_header_library( .stderr ) +add_header_library( + vfprintf_utils + HDRS + vfprintf_utils.h + DEPENDS + .gpu_file +) + add_entrypoint_object( feof SRCS @@ -262,6 +270,46 @@ add_entrypoint_object( .gpu_file ) +add_entrypoint_object( + printf + SRCS + printf.cpp + HDRS + ../printf.h + DEPENDS + .vfprintf_utils +) + +add_entrypoint_object( + vprintf + SRCS + vprintf.cpp + HDRS + ../vprintf.h + DEPENDS + .vfprintf_utils +) + +add_entrypoint_object( + fprintf + SRCS + fprintf.cpp + HDRS + ../fprintf.h + DEPENDS + .vfprintf_utils +) + +add_entrypoint_object( + vfprintf + SRCS + vfprintf.cpp + HDRS + ../vfprintf.h + DEPENDS + .vfprintf_utils +) + add_entrypoint_object( stdin SRCS diff --git a/libc/src/stdio/gpu/fprintf.cpp b/libc/src/stdio/gpu/fprintf.cpp new file mode 100644 index 0000000000000..adae0203180a4 --- /dev/null +++ b/libc/src/stdio/gpu/fprintf.cpp @@ -0,0 +1,32 @@ +//===-- GPU Implementation of fprintf -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/fprintf.h" + +#include "src/__support/CPP/string_view.h" +#include "src/__support/arg_list.h" +#include "src/errno/libc_errno.h" +#include "src/stdio/gpu/vfprintf_utils.h" + +#include + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(int, fprintf, + (::FILE *__restrict stream, const char *__restrict format, + ...)) { + va_list vlist; + va_start(vlist, format); + cpp::string_view str_view(format); + int ret_val = + file::vfprintf_internal(stream, format, str_view.size() + 1, vlist); + va_end(vlist); + return ret_val; +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdio/gpu/printf.cpp b/libc/src/stdio/gpu/printf.cpp new file mode 100644 index 0000000000000..44905f24bad7d --- /dev/null +++ b/libc/src/stdio/gpu/printf.cpp @@ -0,0 +1,30 @@ +//===-- GPU Implementation of printf --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/printf.h" + +#include "src/__support/CPP/string_view.h" +#include "src/__support/arg_list.h" +#include "src/errno/libc_errno.h" +#include "src/stdio/gpu/vfprintf_utils.h" + +#include + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(int, printf, (const char *__restrict format, ...)) { + va_list vlist; + va_start(vlist, format); + cpp::string_view str_view(format); + int ret_val = + file::vfprintf_internal(stdout, format, str_view.size() + 1, vlist); + va_end(vlist); + return ret_val; +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdio/gpu/vfprintf.cpp b/libc/src/stdio/gpu/vfprintf.cpp new file mode 100644 index 0000000000000..2ec65d9afcb97 --- /dev/null +++ b/libc/src/stdio/gpu/vfprintf.cpp @@ -0,0 +1,29 @@ +//===-- GPU Implementation of vfprintf ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/vfprintf.h" + +#include "src/__support/CPP/string_view.h" +#include "src/__support/arg_list.h" +#include "src/errno/libc_errno.h" +#include "src/stdio/gpu/vfprintf_utils.h" + +#include + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(int, vfprintf, + (::FILE *__restrict stream, const char *__restrict format, + va_list vlist)) { + cpp::string_view str_view(format); + int ret_val = + file::vfprintf_internal(stream, format, str_view.size() + 1, vlist); + return ret_val; +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdio/gpu/vfprintf_utils.h b/libc/src/stdio/gpu/vfprintf_utils.h new file mode 100644 index 0000000000000..e1a3b97b35673 --- /dev/null +++ b/libc/src/stdio/gpu/vfprintf_utils.h @@ -0,0 +1,73 @@ +//===--- GPU helper functions for printf using RPC ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/RPC/rpc_client.h" +#include "src/__support/arg_list.h" +#include "src/stdio/gpu/file.h" +#include "src/string/string_utils.h" + +#include + +namespace LIBC_NAMESPACE { +namespace file { + +template +LIBC_INLINE int vfprintf_impl(::FILE *__restrict file, + const char *__restrict format, size_t format_size, + va_list vlist) { + uint64_t mask = gpu::get_lane_mask(); + rpc::Client::Port port = rpc::client.open(); + + if constexpr (opcode == RPC_PRINTF_TO_STREAM) { + port.send([&](rpc::Buffer *buffer) { + buffer->data[0] = reinterpret_cast(file); + }); + } + + size_t args_size = 0; + port.send_n(format, format_size); + port.recv([&](rpc::Buffer *buffer) { + args_size = static_cast(buffer->data[0]); + }); + port.send_n(vlist, args_size); + + uint32_t ret = 0; + for (;;) { + const char *str = nullptr; + port.recv([&](rpc::Buffer *buffer) { + ret = static_cast(buffer->data[0]); + str = reinterpret_cast(buffer->data[1]); + }); + // If any lanes have a string argument it needs to be copied back. + if (!gpu::ballot(mask, str)) + break; + + uint64_t size = str ? internal::string_length(str) + 1 : 0; + port.send_n(str, size); + } + + port.close(); + return ret; +} + +LIBC_INLINE int vfprintf_internal(::FILE *__restrict stream, + const char *__restrict format, + size_t format_size, va_list vlist) { + if (stream == stdout) + return vfprintf_impl(stream, format, format_size, + vlist); + else if (stream == stderr) + return vfprintf_impl(stream, format, format_size, + vlist); + else + return vfprintf_impl(stream, format, format_size, + vlist); +} + +} // namespace file +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdio/gpu/vprintf.cpp b/libc/src/stdio/gpu/vprintf.cpp new file mode 100644 index 0000000000000..ee5b89a1a33d1 --- /dev/null +++ b/libc/src/stdio/gpu/vprintf.cpp @@ -0,0 +1,28 @@ +//===-- GPU Implementation of vprintf -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/vprintf.h" + +#include "src/__support/CPP/string_view.h" +#include "src/__support/arg_list.h" +#include "src/errno/libc_errno.h" +#include "src/stdio/gpu/vfprintf_utils.h" + +#include + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(int, vprintf, + (const char *__restrict format, va_list vlist)) { + cpp::string_view str_view(format); + int ret_val = + file::vfprintf_internal(stdout, format, str_view.size() + 1, vlist); + return ret_val; +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/test/integration/src/stdio/gpu/CMakeLists.txt b/libc/test/integration/src/stdio/gpu/CMakeLists.txt index 6327c45e1ea5a..04fbd4706c556 100644 --- a/libc/test/integration/src/stdio/gpu/CMakeLists.txt +++ b/libc/test/integration/src/stdio/gpu/CMakeLists.txt @@ -13,7 +13,7 @@ add_integration_test( SRCS printf.cpp DEPENDS - libc.src.gpu.rpc_fprintf + libc.src.stdio.fprintf libc.src.stdio.fopen LOADER_ARGS --threads 32 diff --git a/libc/test/integration/src/stdio/gpu/printf.cpp b/libc/test/integration/src/stdio/gpu/printf.cpp index 97ad4ace1dcac..766c4f9439115 100644 --- a/libc/test/integration/src/stdio/gpu/printf.cpp +++ b/libc/test/integration/src/stdio/gpu/printf.cpp @@ -9,8 +9,8 @@ #include "test/IntegrationTest/test.h" #include "src/__support/GPU/utils.h" -#include "src/gpu/rpc_fprintf.h" #include "src/stdio/fopen.h" +#include "src/stdio/fprintf.h" using namespace LIBC_NAMESPACE; @@ -20,68 +20,51 @@ TEST_MAIN(int argc, char **argv, char **envp) { ASSERT_TRUE(file && "failed to open file"); // Check basic printing. int written = 0; - written = LIBC_NAMESPACE::rpc_fprintf(file, "A simple string\n", nullptr, 0); + written = LIBC_NAMESPACE::fprintf(file, "A simple string\n"); ASSERT_EQ(written, 16); const char *str = "A simple string\n"; - written = LIBC_NAMESPACE::rpc_fprintf(file, "%s", &str, sizeof(void *)); + written = LIBC_NAMESPACE::fprintf(file, "%s", str); ASSERT_EQ(written, 16); // Check printing a different value with each thread. uint64_t thread_id = gpu::get_thread_id(); - written = LIBC_NAMESPACE::rpc_fprintf(file, "%8ld\n", &thread_id, - sizeof(thread_id)); + written = LIBC_NAMESPACE::fprintf(file, "%8ld\n", thread_id); ASSERT_EQ(written, 9); - struct { - uint32_t x = 1; - char c = 'c'; - double f = 1.0; - } args1; - written = - LIBC_NAMESPACE::rpc_fprintf(file, "%d%c%.1f\n", &args1, sizeof(args1)); + written = LIBC_NAMESPACE::fprintf(file, "%d%c%.1f\n", 1, 'c', 1.0); ASSERT_EQ(written, 6); - struct { - uint32_t x = 1; - const char *str = "A simple string\n"; - } args2; - written = - LIBC_NAMESPACE::rpc_fprintf(file, "%032b%s\n", &args2, sizeof(args2)); + written = LIBC_NAMESPACE::fprintf(file, "%032b%s\n", 1, "A simple string\n"); ASSERT_EQ(written, 49); // Check that the server correctly handles divergent numbers of arguments. const char *format = gpu::get_thread_id() % 2 ? "%s" : "%20ld\n"; - written = LIBC_NAMESPACE::rpc_fprintf(file, format, &str, sizeof(void *)); + written = LIBC_NAMESPACE::fprintf(file, format, str); ASSERT_EQ(written, gpu::get_thread_id() % 2 ? 16 : 21); format = gpu::get_thread_id() % 2 ? "%s" : str; - written = LIBC_NAMESPACE::rpc_fprintf(file, format, &str, sizeof(void *)); + written = LIBC_NAMESPACE::fprintf(file, format, str); ASSERT_EQ(written, 16); // Check that we handle null arguments correctly. struct { void *null = nullptr; } args3; - written = LIBC_NAMESPACE::rpc_fprintf(file, "%p", &args3, sizeof(args3)); + written = LIBC_NAMESPACE::fprintf(file, "%p", nullptr); ASSERT_EQ(written, 9); #ifndef LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS - written = LIBC_NAMESPACE::rpc_fprintf(file, "%s", &args3, sizeof(args3)); + written = LIBC_NAMESPACE::fprintf(file, "%s", nullptr); ASSERT_EQ(written, 6); #endif // LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS // Check for extremely abused variable width arguments - struct { - uint32_t x = 1; - uint32_t y = 2; - double f = 1.0; - } args4; - written = LIBC_NAMESPACE::rpc_fprintf(file, "%**d", &args4, sizeof(args4)); + written = LIBC_NAMESPACE::fprintf(file, "%**d", 1, 2, 1.0); ASSERT_EQ(written, 4); - written = LIBC_NAMESPACE::rpc_fprintf(file, "%**d%6d", &args4, sizeof(args4)); + written = LIBC_NAMESPACE::fprintf(file, "%**d%6d", 1, 2, 1.0); ASSERT_EQ(written, 10); - written = LIBC_NAMESPACE::rpc_fprintf(file, "%**.**f", &args4, sizeof(args4)); + written = LIBC_NAMESPACE::fprintf(file, "%**.**f", 1, 2, 1.0); ASSERT_EQ(written, 7); return 0; diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp index 095f3fa13ffad..8813f67093010 100644 --- a/libc/utils/gpu/server/rpc_server.cpp +++ b/libc/utils/gpu/server/rpc_server.cpp @@ -44,7 +44,7 @@ template void handle_printf(rpc::Server::Port &port) { // Get the appropriate output stream to use. if (port.get_opcode() == RPC_PRINTF_TO_STREAM) port.recv([&](rpc::Buffer *buffer, uint32_t id) { - files[id] = reinterpret_cast(buffer->data[0]); + files[id] = file::to_stream(buffer->data[0]); }); else if (port.get_opcode() == RPC_PRINTF_TO_STDOUT) std::fill(files, files + lane_size, stdout); @@ -60,6 +60,28 @@ template void handle_printf(rpc::Server::Port &port) { // Recieve the format string and arguments from the client. port.recv_n(format, format_sizes, [&](uint64_t size) { return new char[size]; }); + + // Parse the format string to get the expected size of the buffer. + for (uint32_t lane = 0; lane < lane_size; ++lane) { + if (!format[lane]) + continue; + + WriteBuffer wb(nullptr, 0); + Writer writer(&wb); + + internal::MockArgList printf_args; + Parser parser( + reinterpret_cast(format[lane]), printf_args); + + for (FormatSection cur_section = parser.get_next_section(); + !cur_section.raw_string.empty(); + cur_section = parser.get_next_section()) + ; + args_sizes[lane] = printf_args.read_count(); + } + port.send([&](rpc::Buffer *buffer, uint32_t id) { + buffer->data[0] = args_sizes[id]; + }); port.recv_n(args, args_sizes, [&](uint64_t size) { return new char[size]; }); // Identify any arguments that are actually pointers to strings on the client. diff --git a/llvm/lib/Transforms/IPO/ExpandVariadics.cpp b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp index d340bc041ccda..df70d7427e52c 100644 --- a/llvm/lib/Transforms/IPO/ExpandVariadics.cpp +++ b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp @@ -934,7 +934,13 @@ struct Amdgpu final : public VariadicABIInfo { } VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override { - return {Align(4), false}; + // NVPTX expects natural alignment in all cases. The variadic call ABI will + // handle promoting types to their appropriate size and alignment. + const unsigned MinAlign = 1; + Align A = DL.getABITypeAlign(Parameter); + if (A < MinAlign) + A = Align(MinAlign); + return {A, false}; } };