Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions rust/scx_utils/src/compat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,25 @@ pub fn cond_kprobe_enable<T>(sym: &str, prog_ptr: &OpenProgramImpl<T>) -> Result
Ok(false)
}

pub fn cond_kprobes_enable<T>(kprobes: Vec<(&str, &OpenProgramImpl<T>)>) -> Result<bool> {
// Check if all the symbols exist.
for (sym, _) in kprobes.iter() {
if in_kallsyms(sym)? == false {
warn!("symbol {} is missing, kprobe not loaded", sym);
return Ok(false);
}
}

// Enable all the tracepoints.
for (_, ptr) in kprobes.iter() {
unsafe {
bpf_program__set_autoload(ptr.as_libbpf_object().as_ptr(), true);
}
}

Ok(true)
}

pub fn cond_tracepoint_enable<T>(tracepoint: &str, prog_ptr: &OpenProgramImpl<T>) -> Result<bool> {
if tracepoint_exists(tracepoint)? {
unsafe {
Expand All @@ -258,6 +277,26 @@ pub fn cond_tracepoint_enable<T>(tracepoint: &str, prog_ptr: &OpenProgramImpl<T>

Ok(false)
}

pub fn cond_tracepoints_enable<T>(tracepoints: Vec<(&str, &OpenProgramImpl<T>)>) -> Result<bool> {
// Check if all the tracepoints exist.
for (tp, _) in tracepoints.iter() {
if tracepoint_exists(tp)? == false {
warn!("tradepoint {} is missing, tracepoint not loaded", tp);
return Ok(false);
}
}

// Enable all the tracepoints.
for (_, ptr) in tracepoints.iter() {
unsafe {
bpf_program__set_autoload(ptr.as_libbpf_object().as_ptr(), true);
}
}

Ok(true)
}

pub fn is_sched_ext_enabled() -> io::Result<bool> {
let content = std::fs::read_to_string("/sys/kernel/sched_ext/state")?;

Expand Down
114 changes: 114 additions & 0 deletions scheds/rust/scx_lavd/src/bpf/lock.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,120 @@ static void reset_lock_futex_boost(struct task_ctx *taskc, struct cpu_ctx *cpuc)
* time slice, we assume futex_wake() is skipped.
* - We do not distinguish futex user addresses to lower the tracing burden.
*
* We trace either trace entries or tracepoint entries. Ftrace is low-overhead,
* but it does not provide stability, as function entries can disappear if
* functions are inlined according to specific kernel configurations. Hence,
* the BPF offers both ftrace and tracepoint, allowing userspace to make a
* decision based on availability.
*/

/*
* We trace the folloing futex calls:
* - int __futex_wait(u32 *uaddr, unsigned int flags, u32 val, struct hrtimer_sleeper *to, u32 bitset)
* - int futex_wait_multiple(struct futex_vector *vs, unsigned int count, struct hrtimer_sleeper *to)
* - int futex_wait_requeue_pi(u32 *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset, u32 *uaddr2)
*
* - int futex_wake(u32 *uaddr, unsigned int flags, int nr_wake, u32 bitset)
* - int futex_wake_op(u32 *uaddr1, unsigned int flags, u32 *uaddr2, int nr_wake, int nr_wake2, int op)
*
* - int futex_lock_pi(u32 *uaddr, unsigned int flags, ktime_t *time, int trylock)
* - int futex_unlock_pi(u32 *uaddr, unsigned int flags)
*/
struct futex_vector;
struct hrtimer_sleeper;

SEC("?fexit/__futex_wait")
int BPF_PROG(fexit___futex_wait, u32 *uaddr, unsigned int flags, u32 val, struct hrtimer_sleeper *to, u32 bitset, int ret)
{
if (ret == 0) {
/*
* A futex is acquired.
*/
inc_futex_boost();
}
return 0;
}

SEC("?fexit/futex_wait_multiple")
int BPF_PROG(fexit_futex_wait_multiple, struct futex_vector *vs, unsigned int count, struct hrtimer_sleeper *to, int ret)
{
if (ret == 0) {
/*
* All of futexes are acquired.
*
* We don't want to traverse futex_vector here since that's
* a userspace address. Hence we just pass an invalid adderess
* to consider all futex_waitv() calls are for the same address.
* Thit is a conservative approximation boosting less.
*/
inc_futex_boost();
}
return 0;
}

SEC("?fexit/futex_wait_requeue_pi")
int BPF_PROG(fexit_futex_wait_requeue_pi, u32 *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset, u32 *uaddr2, int ret)
{
if (ret == 0) {
/*
* A futex is acquired.
*/
inc_futex_boost();
}
return 0;
}

SEC("?fexit/futex_wake")
int BPF_PROG(fexit_futex_wake, u32 *uaddr, unsigned int flags, int nr_wake, u32 bitset, int ret)
{
if (ret >= 0) {
/*
* A futex is released.
*/
dec_futex_boost();
}
return 0;
}


SEC("?fexit/futex_wake_op")
int BPF_PROG(fexit_futex_wake_op, u32 *uaddr1, unsigned int flags, u32 *uaddr2, int nr_wake, int nr_wake2, int op, int ret)
{
if (ret >= 0) {
/*
* A futex is released.
*/
dec_futex_boost();
}
return 0;
}

SEC("?fexit/futex_lock_pi")
int BPF_PROG(fexit_futex_lock_pi, u32 *uaddr, unsigned int flags, ktime_t *time, int trylock, int ret)
{
if (ret == 0) {
/*
* A futex is acquired.
*/
inc_futex_boost();
}
return 0;
}

SEC("?fexit/futex_unlock_pi")
int BPF_PROG(fexit_futex_unlock_pi, u32 *uaddr, unsigned int flags, int ret)
{
if (ret == 0) {
/*
* A futex is released.
*/
dec_futex_boost();
}
return 0;
}


/*
* We trace the folloing futex tracepoints:
* - sys_exit_futex
* - sys_exit_futex_wait
Expand Down
68 changes: 46 additions & 22 deletions scheds/rust/scx_lavd/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -332,29 +332,15 @@ impl<'a> Scheduler<'a> {
skel_builder.obj_builder.debug(opts.verbose > 0);
let mut skel = scx_ops_open!(skel_builder, open_object, lavd_ops)?;

// Enable autoloads for conditionally loaded things
// immediately after creating skel (because this is always before loading)
// Enable futex tracing using ftrace if available. If the ftrace is not
// available, use tracepoint, which is known to be slower than ftrace.
if !opts.no_futex_boost {
compat::cond_tracepoint_enable(
"syscalls:sys_enter_futex",
&skel.progs.rtp_sys_enter_futex,
)?;
compat::cond_tracepoint_enable(
"syscalls:sys_exit_futex",
&skel.progs.rtp_sys_exit_futex,
)?;
compat::cond_tracepoint_enable(
"syscalls:sys_exit_futex_wait",
&skel.progs.rtp_sys_exit_futex_wait,
)?;
compat::cond_tracepoint_enable(
"syscalls:sys_exit_futex_waitv",
&skel.progs.rtp_sys_exit_futex_waitv,
)?;
compat::cond_tracepoint_enable(
"syscalls:sys_exit_futex_wake",
&skel.progs.rtp_sys_exit_futex_wake,
)?;
if Self::attach_futex_ftraces(&mut skel)? == false {
info!("Fail to attach futex ftraces. Try with tracepoints.");
if Self::attach_futex_tracepoints(&mut skel)? == false {
info!("Fail to attach futex tracepoints.");
}
}
}

// Initialize CPU topology
Expand Down Expand Up @@ -393,6 +379,44 @@ impl<'a> Scheduler<'a> {
})
}

fn attach_futex_ftraces(skel: &mut OpenBpfSkel) -> Result<bool> {
let ftraces = vec![
("__futex_wait", &skel.progs.fexit___futex_wait),
("futex_wait_multiple", &skel.progs.fexit_futex_wait_multiple),
(
"futex_wait_requeue_pi",
&skel.progs.fexit_futex_wait_requeue_pi,
),
("futex_wake", &skel.progs.fexit_futex_wake),
("futex_wake_op", &skel.progs.fexit_futex_wake_op),
("futex_lock_pi", &skel.progs.fexit_futex_lock_pi),
("futex_unlock_pi", &skel.progs.fexit_futex_unlock_pi),
];

compat::cond_kprobes_enable(ftraces)
}

fn attach_futex_tracepoints(skel: &mut OpenBpfSkel) -> Result<bool> {
let tracepoints = vec![
("syscalls:sys_enter_futex", &skel.progs.rtp_sys_enter_futex),
("syscalls:sys_exit_futex", &skel.progs.rtp_sys_exit_futex),
(
"syscalls:sys_exit_futex_wait",
&skel.progs.rtp_sys_exit_futex_wait,
),
(
"syscalls:sys_exit_futex_waitv",
&skel.progs.rtp_sys_exit_futex_waitv,
),
(
"syscalls:sys_exit_futex_wake",
&skel.progs.rtp_sys_exit_futex_wake,
),
];

compat::cond_tracepoints_enable(tracepoints)
}

fn init_cpus(skel: &mut OpenBpfSkel, order: &CpuOrder) {
debug!("{:#?}", order);

Expand Down