diff --git a/arch/lkl/Kconfig b/arch/lkl/Kconfig index 28019d31a2b212..02b053dfc5d184 100644 --- a/arch/lkl/Kconfig +++ b/arch/lkl/Kconfig @@ -35,6 +35,7 @@ config LKL select IPV6_ADVANCED_ROUTER select ARCH_NO_COHERENT_DMA_MMAP select HAVE_MEMBLOCK + select HAVE_COPY_THREAD_TLS select NO_BOOTMEM config OUTPUT_FORMAT diff --git a/arch/lkl/include/asm/sched.h b/arch/lkl/include/asm/sched.h index da7376363e6f97..be2bf1bc74baeb 100644 --- a/arch/lkl/include/asm/sched.h +++ b/arch/lkl/include/asm/sched.h @@ -25,4 +25,6 @@ struct task_struct* lkl_get_current_task_struct(void); int host_task_stub(void *unused); +extern struct lkl_tls_key *task_key; + #endif /* _ASM_LKL_SCHED_H */ diff --git a/arch/lkl/include/asm/syscalls.h b/arch/lkl/include/asm/syscalls.h index 333e9393ec3d3d..9ff23054a37ec4 100644 --- a/arch/lkl/include/asm/syscalls.h +++ b/arch/lkl/include/asm/syscalls.h @@ -18,7 +18,6 @@ void wakeup_idle_host_task(void); #define sys_mmap sys_mmap_pgoff #define sys_mmap2 sys_mmap_pgoff -#define sys_clone sys_ni_syscall #define sys_vfork sys_ni_syscall #define sys_rt_sigreturn sys_ni_syscall diff --git a/arch/lkl/include/asm/thread_info.h b/arch/lkl/include/asm/thread_info.h index 79a6b781d2c07a..1022ebd55815e7 100644 --- a/arch/lkl/include/asm/thread_info.h +++ b/arch/lkl/include/asm/thread_info.h @@ -23,6 +23,12 @@ struct thread_info { lkl_thread_t tid; struct task_struct *prev_sched; unsigned long stackend; + /* The return address from the currently executing syscall. Invalid when + * the thread is not executing a syscall. */ + void *syscall_ret; + /* The task for any child that was created during syscall execution. Only + * valid on return from a clone-family syscall. */ + struct task_struct *cloned_child; }; #define INIT_THREAD_INFO(tsk) \ @@ -57,6 +63,7 @@ void threads_cleanup(void); #define TIF_SCHED_JB 7 #define TIF_HOST_THREAD 8 #define TIF_NO_TERMINATION 9 // Do not terminate LKL on exit +#define TIF_CLONED_HOST_THREAD 10 // This is a host thread created via a clone-family call. #define __HAVE_THREAD_FUNCTIONS diff --git a/arch/lkl/include/uapi/asm/host_ops.h b/arch/lkl/include/uapi/asm/host_ops.h index c3cc05e11b0dcc..1b6360958cafff 100644 --- a/arch/lkl/include/uapi/asm/host_ops.h +++ b/arch/lkl/include/uapi/asm/host_ops.h @@ -56,6 +56,13 @@ struct ucontext; * * @thread_create - create a new thread and run f(arg) in its context; returns a * thread handle or 0 if the thread could not be created + * @thread_create_host - create a new thread as the result of a fork-like call + * and initialises its register set to the provided program counter, stack + * pointer, and TLS area; returns a thread handle or 0 if the thread could not + * be created + * @thread_destroy_host - destroys the state associated with a host thread that + * has exited via an exit system call. The task_key argument is the TLS + * variable containing the task. The destructor for this must not be run. * @thread_detach - on POSIX systems, free up resources held by * pthreads. Noop on Win32. * @thread_exit - terminates the current thread @@ -119,6 +126,11 @@ struct lkl_host_operations { void (*mutex_unlock)(struct lkl_mutex *mutex); lkl_thread_t (*thread_create)(void (*f)(void *), void *arg); + lkl_thread_t (*thread_create_host)(void* pc, void* sp, void* tls, + struct lkl_tls_key* task_key, void* task_value); + void (*thread_destroy_host)(lkl_thread_t tid, struct lkl_tls_key* + task_key); + void (*thread_detach)(void); void (*thread_exit)(void); int (*thread_join)(lkl_thread_t tid); diff --git a/arch/lkl/include/uapi/asm/unistd.h b/arch/lkl/include/uapi/asm/unistd.h index b30064b1e1857d..c3cf82d4995ab7 100644 --- a/arch/lkl/include/uapi/asm/unistd.h +++ b/arch/lkl/include/uapi/asm/unistd.h @@ -5,6 +5,8 @@ #define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SET_GET_RLIMIT #define __ARCH_WANT_TIME32_SYSCALLS +#define __ARCH_WANT_SYS_CLONE + #include diff --git a/arch/lkl/kernel/syscalls.c b/arch/lkl/kernel/syscalls.c index 0c5038c94dc535..01b0a9b88a1808 100644 --- a/arch/lkl/kernel/syscalls.c +++ b/arch/lkl/kernel/syscalls.c @@ -116,7 +116,7 @@ static void del_host_task(void *arg) lkl_ops->jmp_buf_set(&ti->sched_jb, exit_task); } -static struct lkl_tls_key *task_key; +struct lkl_tls_key *task_key; /* Use this to record an ongoing LKL shutdown */ _Atomic(bool) lkl_shutdown = false; @@ -131,6 +131,7 @@ struct task_struct* lkl_get_current_task_struct(void) long lkl_syscall(long no, long *params) { struct task_struct *task = host0; + struct thread_info *ti; long ret; LKL_TRACE( @@ -172,6 +173,14 @@ long lkl_syscall(long no, long *params) } } + ti = task_thread_info(task); + /* + * Store the return address so that it can be used in clone and similar + * calls. In conventional arch ports, this would happen for free because + * the system call would capture the register state of the callee. + */ + ti->syscall_ret = __builtin_return_address(0); + LKL_TRACE("switching to host task (no=%li task=%s current=%s)\n", no, task->comm, current->comm); @@ -185,6 +194,10 @@ long lkl_syscall(long no, long *params) LKL_TRACE("returned from run_syscall() (no=%li task=%s current=%s)\n", no, task->comm, current->comm); + /* + * Zero the return address so that nothing accidentally sees a stale value. + */ + ti->syscall_ret = 0; task_work_run(); /* @@ -201,6 +214,31 @@ long lkl_syscall(long no, long *params) } out: + /* + * If we have created a new host task, make sure that it isn't on the + * scheduler queue when we return. LKL expects that the only tasks driven + * by the Linux scheduler are kernel threads. If releasing the CPU lock + * entirely and there are runnable tasks, `lkl_cpu_put` may run the + * scheduler and not release the lock. The scheduler hands the CPU lock to + * the next running thread and `lkl_cpu_put` expects this to be the idle + * host task (which then releases the lock). If host tasks are scheduled, + * they will be left running (and owning the CPU lock) and `lkl_cpu_put` + * will return without anything having released the lock. LKL will then + * deadlock on the next system call. + */ + if (ti->cloned_child) + { + struct task_struct *child = ti->cloned_child; + ti->cloned_child = NULL; + /* + * We can't change the scheduler state of a task that isn't running, so + * switch to the task and then mark it as uninteruptible. + */ + switch_to_host_task(child); + child->state = TASK_UNINTERRUPTIBLE; + /* Switch back to the calling task before we return. */ + switch_to_host_task(task); + } lkl_cpu_put(); LKL_TRACE("done (no=%li task=%s current=%s ret=%i)\n", no, diff --git a/arch/lkl/kernel/threads.c b/arch/lkl/kernel/threads.c index a5d1dc24db86e9..501eaa899047e7 100644 --- a/arch/lkl/kernel/threads.c +++ b/arch/lkl/kernel/threads.c @@ -74,14 +74,23 @@ static void kill_thread(struct thread_info *ti) lkl_ops->sem_up(ti->sched_sem); lkl_ops->thread_join(ti->tid); } else { - + /* + * If this is a task backing a host thread created by clone, then we + * need to destroy the associated host thread, but not exit LKL. + */ + if (test_ti_thread_flag(ti, TIF_CLONED_HOST_THREAD)) { + clear_ti_thread_flag(ti, TIF_CLONED_HOST_THREAD); + ti->dead = true; + BUG_ON(!lkl_ops->thread_destroy_host); + lkl_ops->thread_destroy_host(ti->tid, task_key); + ti->tid = 0; /* * Check if the host thread was killed due to its deallocation when * the associated application thread terminated gracefully. If not, * the thread has terminated due to a SYS_exit or a signal. In this * case, we need to notify the host to initiate an LKL shutdown. */ - if (!test_ti_thread_flag(ti, TIF_NO_TERMINATION)) { + } else if (!test_ti_thread_flag(ti, TIF_NO_TERMINATION)) { int exit_code = task->exit_code; int exit_status = exit_code >> 8; int received_signal = exit_code & 255; @@ -239,8 +248,8 @@ static void thread_bootstrap(void *_tba) do_exit(0); } -int copy_thread(unsigned long clone_flags, unsigned long esp, - unsigned long unused, struct task_struct *p) +int copy_thread_tls(unsigned long clone_flags, unsigned long esp, + unsigned long unused, struct task_struct *p, unsigned long tls) { LKL_TRACE("enter\n"); @@ -252,6 +261,31 @@ int copy_thread(unsigned long clone_flags, unsigned long esp, return 0; } + /* + * If we are creating a new userspace thread and are in the middle of a + * system call, create a new host thread coupled with this task. The + * second check is necessary because we also hit this path when lazily + * binding a host thread to a new task on system call entry. + */ + void *pc = task_thread_info(current)->syscall_ret; + if (pc && !(p->flags & PF_KTHREAD)) { + /* + * If we have host support for creating new threads with fine-grained + * control over their initial state, use it to create a new host + * thread. + */ + if (lkl_ops->thread_create_host) { + static unsigned long long clone_count = 0; + set_ti_thread_flag(ti, TIF_HOST_THREAD); + set_ti_thread_flag(ti, TIF_CLONED_HOST_THREAD); + ti->tid = lkl_ops->thread_create_host(pc, (void*)esp, (void*)tls, task_key, p); + snprintf(p->comm, sizeof(p->comm), "host_clone%llu", __sync_fetch_and_add(&clone_count, 1)); + current_thread_info()->cloned_child = p; + return (ti->tid == 0) ? -ENOMEM : 0; + } + return -ENODEV; + } + tba = kmalloc(sizeof(*tba), GFP_KERNEL); if (!tba) return -ENOMEM;