Skip to content
This repository has been archived by the owner on Jun 18, 2024. It is now read-only.

Allow clean exits from the scheduler with scx_bpf_exit() #166

Merged
merged 4 commits into from
Mar 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion include/linux/sched/ext.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ enum scx_exit_kind {
SCX_EXIT_NONE,
SCX_EXIT_DONE,

SCX_EXIT_UNREG = 64, /* BPF unregistration */
SCX_EXIT_UNREG = 64, /* User-space initiated unregistration */
SCX_EXIT_UNREG_BPF, /* BPF-initiated unregistration */
SCX_EXIT_SYSRQ, /* requested by 'S' sysrq */

SCX_EXIT_ERROR = 1024, /* runtime error, error msg contains details */
Expand All @@ -72,6 +73,9 @@ struct scx_exit_info {
/* %SCX_EXIT_* - broad category of the exit reason */
enum scx_exit_kind kind;

/* exit code if gracefully exiting from BPF */
s64 exit_code;

/* textual representation of the above */
const char *reason;

Expand Down
66 changes: 50 additions & 16 deletions kernel/sched/ext.c
Original file line number Diff line number Diff line change
Expand Up @@ -3304,7 +3304,9 @@ static const char *scx_exit_reason(enum scx_exit_kind kind)
{
switch (kind) {
case SCX_EXIT_UNREG:
return "BPF scheduler unregistered";
return "Scheduler unregistered from user space";
case SCX_EXIT_UNREG_BPF:
return "Scheduler unregistered from BPF";
case SCX_EXIT_SYSRQ:
return "disabled by sysrq-S";
case SCX_EXIT_ERROR:
Expand Down Expand Up @@ -3606,8 +3608,9 @@ static void scx_ops_error_irq_workfn(struct irq_work *irq_work)

static DEFINE_IRQ_WORK(scx_ops_error_irq_work, scx_ops_error_irq_workfn);

__printf(2, 3) void scx_ops_error_kind(enum scx_exit_kind kind,
const char *fmt, ...)
__printf(3, 4) void scx_ops_exit_kind(enum scx_exit_kind kind,
s64 exit_code,
const char *fmt, ...)
{
struct scx_exit_info *ei = scx_exit_info;
int none = SCX_EXIT_NONE;
Expand All @@ -3616,12 +3619,15 @@ __printf(2, 3) void scx_ops_error_kind(enum scx_exit_kind kind,
if (!atomic_try_cmpxchg(&scx_exit_kind, &none, kind))
return;

ei->bt_len = stack_trace_save(ei->bt, SCX_EXIT_BT_LEN, 1);
if (kind >= SCX_EXIT_ERROR)
ei->bt_len = stack_trace_save(ei->bt, SCX_EXIT_BT_LEN, 1);

va_start(args, fmt);
vscnprintf(ei->msg, SCX_EXIT_MSG_LEN, fmt, args);
va_end(args);

ei->exit_code = exit_code;

/*
* Set ei->kind and ->reason for scx_dump_state(). They'll be set again
* in scx_ops_disable_workfn().
Expand Down Expand Up @@ -5059,17 +5065,9 @@ struct scx_bpf_error_bstr_bufs {

static DEFINE_PER_CPU(struct scx_bpf_error_bstr_bufs, scx_bpf_error_bstr_bufs);

/**
* scx_bpf_error_bstr - Indicate fatal error
* @fmt: error message format string
* @data: format string parameters packaged using ___bpf_fill() macro
* @data__sz: @data len, must end in '__sz' for the verifier
*
* Indicate that the BPF scheduler encountered a fatal error and initiate ops
* disabling.
*/
__bpf_kfunc void scx_bpf_error_bstr(char *fmt, unsigned long long *data,
u32 data__sz)
static void bpf_exit_bstr_common(enum scx_exit_kind kind, s64 exit_code,
char *fmt, unsigned long long *data,
u32 data__sz)
{
struct bpf_bprintf_data bprintf_data = { .get_bin_args = true };
struct scx_bpf_error_bstr_bufs *bufs;
Expand Down Expand Up @@ -5108,9 +5106,44 @@ __bpf_kfunc void scx_bpf_error_bstr(char *fmt, unsigned long long *data,
goto out_restore;
}

scx_ops_error_kind(SCX_EXIT_ERROR_BPF, "%s", bufs->msg);
scx_ops_exit_kind(kind, exit_code, "%s", bufs->msg);
out_restore:
local_irq_restore(flags);

}

/**
* scx_bpf_exit_bstr - Gracefully exit the BPF scheduler.
* @exit_code: Exit value to pass to user space via struct scx_exit_info.
* @fmt: error message format string
* @data: format string parameters packaged using ___bpf_fill() macro
* @data__sz: @data len, must end in '__sz' for the verifier
*
* Indicate that the BPF scheduler wants to exit gracefully, and initiate ops
* disabling.
*/
__bpf_kfunc void scx_bpf_exit_bstr(s64 exit_code, char *fmt,
unsigned long long *data, u32 data__sz)
{
bpf_exit_bstr_common(SCX_EXIT_UNREG_BPF, exit_code, fmt, data,
data__sz);
}

/**
* scx_bpf_error_bstr - Indicate fatal error
* @fmt: error message format string
* @data: format string parameters packaged using ___bpf_fill() macro
* @data__sz: @data len, must end in '__sz' for the verifier
*
* Indicate that the BPF scheduler encountered a fatal error and initiate ops
* disabling.
*/
__bpf_kfunc void scx_bpf_error_bstr(char *fmt, unsigned long long *data,
u32 data__sz)
{

bpf_exit_bstr_common(SCX_EXIT_ERROR_BPF, 0, fmt, data,
data__sz);
}

/**
Expand Down Expand Up @@ -5200,6 +5233,7 @@ BTF_ID_FLAGS(func, scx_bpf_get_idle_cpumask, KF_ACQUIRE)
BTF_ID_FLAGS(func, scx_bpf_get_idle_smtmask, KF_ACQUIRE)
BTF_ID_FLAGS(func, scx_bpf_put_idle_cpumask, KF_RELEASE)
BTF_ID_FLAGS(func, scx_bpf_error_bstr, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, scx_bpf_exit_bstr, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, scx_bpf_task_running, KF_RCU)
BTF_ID_FLAGS(func, scx_bpf_task_cpu, KF_RCU)
#ifdef CONFIG_CGROUP_SCHED
Expand Down
8 changes: 6 additions & 2 deletions kernel/sched/ext.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,12 @@ int scx_check_setscheduler(struct task_struct *p, int policy);
bool scx_can_stop_tick(struct rq *rq);
void init_sched_ext_class(void);

__printf(2, 3) void scx_ops_error_kind(enum scx_exit_kind kind,
const char *fmt, ...);
__printf(3, 4) void scx_ops_exit_kind(enum scx_exit_kind kind,
s64 exit_code,
const char *fmt, ...);
#define scx_ops_error_kind(__err, fmt, args...) \
scx_ops_exit_kind(__err, 0, fmt, ##args)

#define scx_ops_error(fmt, args...) \
scx_ops_error_kind(SCX_EXIT_ERROR, fmt, ##args)

Expand Down
51 changes: 38 additions & 13 deletions tools/sched_ext/include/scx/common.bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,28 +29,53 @@ static inline void ___vmlinux_h_sanity_check___(void)
}

void scx_bpf_error_bstr(char *fmt, unsigned long long *data, u32 data_len) __ksym;
void scx_bpf_exit_bstr(s64 exit_code, char *fmt,
unsigned long long *data, u32 data__sz) __ksym;

static inline __attribute__((format(printf, 1, 2)))
void ___scx_bpf_error_format_checker(const char *fmt, ...) {}
void ___scx_bpf_exit_format_checker(const char *fmt, ...) {}

/*
* Helper macro for initializing the fmt and variadic argument inputs to both
* bstr exit kfuncs. Callers to this function should use ___fmt and ___param to
* refer to the initialized list of inputs to the bstr kfunc.
*/
#define scx_bpf_exit_preamble(fmt, args...) \
static char ___fmt[] = fmt; \
/* \
* Note that __param[] must have at least one \
* element to keep the verifier happy. \
*/ \
unsigned long long ___param[___bpf_narg(args) ?: 1] = {}; \
\
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
___bpf_fill(___param, args); \
_Pragma("GCC diagnostic pop") \

/*
* scx_bpf_exit() wraps the scx_bpf_exit_bstr() kfunc with variadic arguments
* instead of an array of u64. Using this macro will cause the scheduler to
* exit cleanly with the specified exit code being passed to user space.
*/
#define scx_bpf_exit(code, fmt, args...) \
({ \
scx_bpf_exit_preamble(fmt, args) \
scx_bpf_exit_bstr(code, ___fmt, ___param, sizeof(___param)); \
___scx_bpf_exit_format_checker(fmt, ##args); \
})

/*
* scx_bpf_error() wraps the scx_bpf_error_bstr() kfunc with variadic arguments
* instead of an array of u64. Note that __param[] must have at least one
* element to keep the verifier happy.
* instead of an array of u64. Invoking this macro will cause the scheduler to
* exit in an erroneous state, with diagnostic information being passed to the
* user.
*/
#define scx_bpf_error(fmt, args...) \
({ \
static char ___fmt[] = fmt; \
unsigned long long ___param[___bpf_narg(args) ?: 1] = {}; \
\
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
___bpf_fill(___param, args); \
_Pragma("GCC diagnostic pop") \
\
scx_bpf_exit_preamble(fmt, args) \
scx_bpf_error_bstr(___fmt, ___param, sizeof(___param)); \
\
___scx_bpf_error_format_checker(fmt, ##args); \
___scx_bpf_exit_format_checker(fmt, ##args); \
})

s32 scx_bpf_create_dsq(u64 dsq_id, s32 node) __ksym;
Expand Down
3 changes: 3 additions & 0 deletions tools/sched_ext/include/scx/user_exit_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ enum uei_sizes {

struct user_exit_info {
int kind;
s64 exit_code;
char reason[UEI_REASON_LEN];
char msg[UEI_MSG_LEN];
};
Expand All @@ -39,6 +40,8 @@ struct user_exit_info {
sizeof(__uei_name.msg), (__ei)->msg); \
bpf_probe_read_kernel_str(__uei_name##_dump, \
__uei_name##_dump_len, (__ei)->dump); \
if (bpf_core_field_exists((__ei)->exit_code)) \
__uei_name.exit_code = (__ei)->exit_code; \
/* use __sync to force memory barrier */ \
__sync_val_compare_and_swap(&__uei_name.kind, __uei_name.kind, \
(__ei)->kind); \
Expand Down
1 change: 1 addition & 0 deletions tools/testing/selftests/sched_ext/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ auto-test-targets := \
enq_select_cpu_fails \
ddsp_bogus_dsq_fail \
ddsp_vtimelocal_fail \
exit \
init_enable_count \
maximal \
maybe_null \
Expand Down
84 changes: 84 additions & 0 deletions tools/testing/selftests/sched_ext/exit.bpf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
* Copyright (c) 2024 David Vernet <dvernet@meta.com>
*/

#include <scx/common.bpf.h>

char _license[] SEC("license") = "GPL";

#include "exit_test.h"

const volatile int exit_point;
UEI_DEFINE(uei);

#define EXIT_CLEANLY() scx_bpf_exit(exit_point, "%d", exit_point)

s32 BPF_STRUCT_OPS(exit_select_cpu, struct task_struct *p,
s32 prev_cpu, u64 wake_flags)
{
bool found;

if (exit_point == EXIT_SELECT_CPU)
EXIT_CLEANLY();

return scx_bpf_select_cpu_dfl(p, prev_cpu, wake_flags, &found);
}

void BPF_STRUCT_OPS(exit_enqueue, struct task_struct *p, u64 enq_flags)
{
if (exit_point == EXIT_ENQUEUE)
EXIT_CLEANLY();

scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
}

void BPF_STRUCT_OPS(exit_dispatch, s32 cpu, struct task_struct *p)
{
if (exit_point == EXIT_DISPATCH)
EXIT_CLEANLY();

scx_bpf_consume(SCX_DSQ_GLOBAL);
}

void BPF_STRUCT_OPS(exit_enable, struct task_struct *p)
{
if (exit_point == EXIT_ENABLE)
EXIT_CLEANLY();
}

s32 BPF_STRUCT_OPS(exit_init_task, struct task_struct *p,
struct scx_init_task_args *args)
{
if (exit_point == EXIT_INIT_TASK)
EXIT_CLEANLY();

return 0;
}

void BPF_STRUCT_OPS(exit_exit, struct scx_exit_info *ei)
{
UEI_RECORD(uei, ei);
}

s32 BPF_STRUCT_OPS_SLEEPABLE(exit_init)
{
if (exit_point == EXIT_INIT)
EXIT_CLEANLY();

return 0;
}

SEC(".struct_ops.link")
struct sched_ext_ops exit_ops = {
.select_cpu = exit_select_cpu,
.enqueue = exit_enqueue,
.dispatch = exit_dispatch,
.init_task = exit_init_task,
.enable = exit_enable,
.exit = exit_exit,
.init = exit_init,
.name = "exit",
.timeout_ms = 1000U,
};
55 changes: 55 additions & 0 deletions tools/testing/selftests/sched_ext/exit.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
* Copyright (c) 2024 David Vernet <dvernet@meta.com>
*/
#include <bpf/bpf.h>
#include <sched.h>
#include <scx/common.h>
#include <sys/wait.h>
#include <unistd.h>
#include "exit.bpf.skel.h"
#include "scx_test.h"

#include "exit_test.h"

static enum scx_test_status run(void *ctx)
{
enum exit_test_case tc;

for (tc = 0; tc < NUM_EXITS; tc++) {
struct exit *skel;
struct bpf_link *link;
char buf[16];

skel = exit__open();
skel->rodata->exit_point = tc;
exit__load(skel);
link = bpf_map__attach_struct_ops(skel->maps.exit_ops);
if (!link) {
SCX_ERR("Failed to attach scheduler");
exit__destroy(skel);
return SCX_TEST_FAIL;
}

/* Assumes uei.kind is written last */
while (skel->data->uei.kind == SCX_EXIT_NONE)
sched_yield();

SCX_EQ(skel->data->uei.kind, SCX_EXIT_UNREG_BPF);
SCX_EQ(skel->data->uei.exit_code, tc);
sprintf(buf, "%d", tc);
SCX_ASSERT(!strcmp(skel->data->uei.msg, buf));
bpf_link__destroy(link);
exit__destroy(skel);
}

return SCX_TEST_PASS;
}

struct scx_test exit_test = {
.name = "exit",
.description = "Verify we can cleanly exit a scheduler in multiple places",
.run = run,
};
REGISTER_SCX_TEST(&exit_test)
Loading