diff --git a/libc/config/linux/app.h b/libc/config/linux/app.h index 160d44e606143..ffd0e01f7b5a4 100644 --- a/libc/config/linux/app.h +++ b/libc/config/linux/app.h @@ -35,7 +35,9 @@ struct TLSImage { uintptr_t align; }; -#if defined(LIBC_TARGET_ARCH_IS_X86_64) || defined(LIBC_TARGET_ARCH_IS_AARCH64) +#if defined(LIBC_TARGET_ARCH_IS_X86_64) || \ + defined(LIBC_TARGET_ARCH_IS_AARCH64) || \ + defined(LIBC_TARGET_ARCH_IS_RISCV64) // At the language level, argc is an int. But we use uint64_t as the x86_64 // ABI specifies it as an 8 byte value. Likewise, in the ARM64 ABI, arguments // are usually passed in registers. x0 is a doubleword register, so this is diff --git a/libc/config/linux/riscv64/entrypoints.txt b/libc/config/linux/riscv64/entrypoints.txt index 625f7683bcc82..61789ddb320c7 100644 --- a/libc/config/linux/riscv64/entrypoints.txt +++ b/libc/config/linux/riscv64/entrypoints.txt @@ -365,6 +365,34 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.stdio.stdin libc.src.stdio.stdout + # stdlib.h entrypoints + libc.src.stdlib._Exit + libc.src.stdlib.atexit + libc.src.stdlib.exit + libc.src.stdlib.getenv + + # threads.h entrypoints + libc.src.threads.call_once + libc.src.threads.cnd_broadcast + libc.src.threads.cnd_destroy + libc.src.threads.cnd_init + libc.src.threads.cnd_signal + libc.src.threads.cnd_wait + libc.src.threads.mtx_destroy + libc.src.threads.mtx_init + libc.src.threads.mtx_lock + libc.src.threads.mtx_unlock + libc.src.threads.thrd_create + libc.src.threads.thrd_current + libc.src.threads.thrd_detach + libc.src.threads.thrd_equal + libc.src.threads.thrd_exit + libc.src.threads.thrd_join + libc.src.threads.tss_create + libc.src.threads.tss_delete + libc.src.threads.tss_get + libc.src.threads.tss_set + # time.h entrypoints libc.src.time.asctime libc.src.time.asctime_r @@ -377,6 +405,9 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.time.mktime libc.src.time.nanosleep libc.src.time.time + + # unistd.h entrypoints + libc.src.unistd.environ ) endif() diff --git a/libc/src/__support/threads/linux/thread.cpp b/libc/src/__support/threads/linux/thread.cpp index f158f16ec2dfd..c0bbd3d77e885 100644 --- a/libc/src/__support/threads/linux/thread.cpp +++ b/libc/src/__support/threads/linux/thread.cpp @@ -55,6 +55,16 @@ static constexpr unsigned CLONE_SYSCALL_FLAGS = // wake the joining thread. | CLONE_SETTLS; // Setup the thread pointer of the new thread. +#ifdef LIBC_TARGET_ARCH_IS_AARCH64 +#define CLONE_RESULT_REGISTER "x0" +#elif defined(LIBC_TARGET_ARCH_IS_RISCV64) +#define CLONE_RESULT_REGISTER "t0" +#elif defined(LIBC_TARGET_ARCH_IS_X86_64) +#define CLONE_RESULT_REGISTER "rax" +#else +#error "CLONE_RESULT_REGISTER not defined for your target architecture" +#endif + LIBC_INLINE ErrorOr alloc_stack(size_t size) { long mmap_result = __llvm_libc::syscall_impl(MMAP_SYSCALL_NUMBER, @@ -106,7 +116,8 @@ __attribute__((always_inline)) inline uintptr_t get_start_args_addr() { // on to the stack. So, we have to step past two 64-bit values to get // to the start args. + sizeof(uintptr_t) * 2; -#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) +#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) || \ + defined(LIBC_TARGET_ARCH_IS_RISCV64) // The frame pointer after cloning the new thread in the Thread::run method // is set to the stack pointer where start args are stored. So, we fetch // from there. @@ -190,16 +201,17 @@ int Thread::run(ThreadStyle style, ThreadRunner runner, void *arg, void *stack, // Also, we want the result of the syscall to be in a register as the child // thread gets a completely different stack after it is created. The stack // variables from this function will not be availalbe to the child thread. -#ifdef LIBC_TARGET_ARCH_IS_X86_64 - long register clone_result asm("rax"); +#if defined(LIBC_TARGET_ARCH_IS_X86_64) + long register clone_result asm(CLONE_RESULT_REGISTER); clone_result = __llvm_libc::syscall_impl( SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack, &attrib->tid, // The address where the child tid is written &clear_tid->val, // The futex where the child thread status is signalled tls.tp // The thread pointer value for the new thread. ); -#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) - long register clone_result asm("x0"); +#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) || \ + defined(LIBC_TARGET_ARCH_IS_RISCV64) + long register clone_result asm(CLONE_RESULT_REGISTER); clone_result = __llvm_libc::syscall_impl( SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack, &attrib->tid, // The address where the child tid is written @@ -220,6 +232,8 @@ int Thread::run(ThreadStyle style, ThreadRunner runner, void *arg, void *stack, #else asm volatile("mov x29, sp"); #endif +#elif defined(LIBC_TARGET_ARCH_IS_RISCV64) + asm volatile("mv fp, sp"); #endif start_thread(); } else if (clone_result < 0) { diff --git a/libc/src/__support/threads/thread.h b/libc/src/__support/threads/thread.h index 127182db588e7..9197d0f3873a9 100644 --- a/libc/src/__support/threads/thread.h +++ b/libc/src/__support/threads/thread.h @@ -37,7 +37,8 @@ union ThreadReturnValue { }; #if (defined(LIBC_TARGET_ARCH_IS_AARCH64) || \ - defined(LIBC_TARGET_ARCH_IS_X86_64)) + defined(LIBC_TARGET_ARCH_IS_X86_64) || \ + defined(LIBC_TARGET_ARCH_IS_RISCV64)) constexpr unsigned int STACK_ALIGNMENT = 16; #endif // TODO: Provide stack alignment requirements for other architectures. diff --git a/libc/startup/linux/riscv64/CMakeLists.txt b/libc/startup/linux/riscv64/CMakeLists.txt new file mode 100644 index 0000000000000..14d6409af6cc1 --- /dev/null +++ b/libc/startup/linux/riscv64/CMakeLists.txt @@ -0,0 +1,16 @@ +add_startup_object( + crt1 + SRC + start.cpp + DEPENDS + libc.config.linux.app_h + libc.include.sys_mman + libc.include.sys_syscall + libc.src.__support.OSUtil.osutil + libc.src.stdlib.exit + libc.src.stdlib.atexit + libc.src.string.memory_utils.memcpy_implementation + COMPILE_OPTIONS + -fno-omit-frame-pointer + -ffreestanding # To avoid compiler warnings about calling the main function. +) diff --git a/libc/startup/linux/riscv64/start.cpp b/libc/startup/linux/riscv64/start.cpp new file mode 100644 index 0000000000000..c65407d06d64d --- /dev/null +++ b/libc/startup/linux/riscv64/start.cpp @@ -0,0 +1,213 @@ +//===-- Implementation of crt for riscv64 ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "config/linux/app.h" +#include "src/__support/OSUtil/syscall.h" +#include "src/__support/threads/thread.h" +#include "src/stdlib/atexit.h" +#include "src/stdlib/exit.h" +#include "src/string/memory_utils/memcpy_implementations.h" + +#include +#include +#include +#include +#include +#include + +extern "C" int main(int, char **, char **); +// The BFD linker requires a reference to __dso_handle to trigger creating +// a symbol for it when -nostdlib is used.. +extern "C" void *__dso_handle = nullptr; + +namespace __llvm_libc { + +#ifdef SYS_mmap2 +static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2; +#elif SYS_mmap +static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap; +#else +#error "Target platform does not have SYS_mmap or SYS_mmap2 defined" +#endif + +AppProperties app; + +static ThreadAttributes main_thread_attrib; + +void init_tls(TLSDescriptor &tls_descriptor) { + if (app.tls.size == 0) { + tls_descriptor.size = 0; + tls_descriptor.tp = 0; + return; + } + + // riscv64 follows the variant 1 TLS layout: + const uintptr_t size_of_pointers = 2 * sizeof(uintptr_t); + uintptr_t padding = 0; + const uintptr_t ALIGNMENT_MASK = app.tls.align - 1; + uintptr_t diff = size_of_pointers & ALIGNMENT_MASK; + if (diff != 0) + padding += (ALIGNMENT_MASK - diff) + 1; + + uintptr_t alloc_size = size_of_pointers + padding + app.tls.size; + + // We cannot call the mmap function here as the functions set errno on + // failure. Since errno is implemented via a thread local variable, we cannot + // use errno before TLS is setup. + long mmap_ret_val = __llvm_libc::syscall_impl( + MMAP_SYSCALL_NUMBER, nullptr, alloc_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + // We cannot check the return value with MAP_FAILED as that is the return + // of the mmap function and not the mmap syscall. + if (mmap_ret_val < 0 && static_cast(mmap_ret_val) > -app.pageSize) + __llvm_libc::syscall_impl(SYS_exit, 1); + uintptr_t thread_ptr = uintptr_t(reinterpret_cast(mmap_ret_val)); + uintptr_t tls_addr = thread_ptr + size_of_pointers + padding; + __llvm_libc::inline_memcpy(reinterpret_cast(tls_addr), + reinterpret_cast(app.tls.address), + app.tls.init_size); + tls_descriptor.size = alloc_size; + tls_descriptor.addr = thread_ptr; + tls_descriptor.tp = tls_addr; +} + +void cleanup_tls(uintptr_t addr, uintptr_t size) { + if (size == 0) + return; + __llvm_libc::syscall_impl(SYS_munmap, addr, size); +} + +static void set_thread_ptr(uintptr_t val) { + LIBC_INLINE_ASM("ld tp, %0\n\t" : : "m"(val)); +} + +using InitCallback = void(int, char **, char **); +using FiniCallback = void(void); + +extern "C" { +// These arrays are present in the .init_array and .fini_array sections. +// The symbols are inserted by linker when it sees references to them. +extern uintptr_t __preinit_array_start[]; +extern uintptr_t __preinit_array_end[]; +extern uintptr_t __init_array_start[]; +extern uintptr_t __init_array_end[]; +extern uintptr_t __fini_array_start[]; +extern uintptr_t __fini_array_end[]; +} + +static void call_init_array_callbacks(int argc, char **argv, char **env) { + size_t preinit_array_size = __preinit_array_end - __preinit_array_start; + for (size_t i = 0; i < preinit_array_size; ++i) + reinterpret_cast(__preinit_array_start[i])(argc, argv, env); + size_t init_array_size = __init_array_end - __init_array_start; + for (size_t i = 0; i < init_array_size; ++i) + reinterpret_cast(__init_array_start[i])(argc, argv, env); +} + +static void call_fini_array_callbacks() { + size_t fini_array_size = __fini_array_end - __fini_array_start; + for (size_t i = fini_array_size; i > 0; --i) + reinterpret_cast(__fini_array_start[i - 1])(); +} + +} // namespace __llvm_libc + +using __llvm_libc::app; + +// TODO: Would be nice to use the aux entry structure from elf.h when available. +struct AuxEntry { + uint64_t type; + uint64_t value; +}; + +__attribute__((noinline)) static void do_start() { + auto tid = __llvm_libc::syscall_impl(SYS_gettid); + if (tid <= 0) + __llvm_libc::syscall_impl(SYS_exit, 1); + __llvm_libc::main_thread_attrib.tid = tid; + + // After the argv array, is a 8-byte long NULL value before the array of env + // values. The end of the env values is marked by another 8-byte long NULL + // value. We step over it (the "+ 1" below) to get to the env values. + uint64_t *env_ptr = app.args->argv + app.args->argc + 1; + uint64_t *env_end_marker = env_ptr; + app.envPtr = env_ptr; + while (*env_end_marker) + ++env_end_marker; + + // Initialize the POSIX global declared in unistd.h + environ = reinterpret_cast(env_ptr); + + // After the env array, is the aux-vector. The end of the aux-vector is + // denoted by an AT_NULL entry. + Elf64_Phdr *programHdrTable = nullptr; + uintptr_t programHdrCount; + for (AuxEntry *aux_entry = reinterpret_cast(env_end_marker + 1); + aux_entry->type != AT_NULL; ++aux_entry) { + switch (aux_entry->type) { + case AT_PHDR: + programHdrTable = reinterpret_cast(aux_entry->value); + break; + case AT_PHNUM: + programHdrCount = aux_entry->value; + break; + case AT_PAGESZ: + app.pageSize = aux_entry->value; + break; + default: + break; // TODO: Read other useful entries from the aux vector. + } + } + + app.tls.size = 0; + for (uintptr_t i = 0; i < programHdrCount; ++i) { + Elf64_Phdr *phdr = programHdrTable + i; + if (phdr->p_type != PT_TLS) + continue; + // TODO: p_vaddr value has to be adjusted for static-pie executables. + app.tls.address = phdr->p_vaddr; + app.tls.size = phdr->p_memsz; + app.tls.init_size = phdr->p_filesz; + app.tls.align = phdr->p_align; + } + + __llvm_libc::TLSDescriptor tls; + __llvm_libc::init_tls(tls); + if (tls.size != 0) + __llvm_libc::set_thread_ptr(tls.tp); + + __llvm_libc::self.attrib = &__llvm_libc::main_thread_attrib; + __llvm_libc::main_thread_attrib.atexit_callback_mgr = + __llvm_libc::internal::get_thread_atexit_callback_mgr(); + + // We want the fini array callbacks to be run after other atexit + // callbacks are run. So, we register them before running the init + // array callbacks as they can potentially register their own atexit + // callbacks. + __llvm_libc::atexit(&__llvm_libc::call_fini_array_callbacks); + + __llvm_libc::call_init_array_callbacks( + app.args->argc, reinterpret_cast(app.args->argv), + reinterpret_cast(env_ptr)); + + int retval = main(app.args->argc, reinterpret_cast(app.args->argv), + reinterpret_cast(env_ptr)); + + // TODO: TLS cleanup should be done after all other atexit callbacks + // are run. So, register a cleanup callback for it with atexit before + // everything else. + __llvm_libc::cleanup_tls(tls.addr, tls.size); + __llvm_libc::exit(retval); +} + +extern "C" void _start() { + // Fetch the args using the frame pointer. + app.args = reinterpret_cast<__llvm_libc::Args *>( + reinterpret_cast(__builtin_frame_address(0))); + do_start(); +}