Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 2 additions & 128 deletions scheds/rust/scx_flash/src/bpf/main.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,6 @@ const volatile u64 slice_lag = 20ULL * NSEC_PER_MSEC;
*/
const volatile bool local_kthreads;

/*
* When enabled, threads who's hold a user-space lock and still able to run
* will be dispatch first. See flash_dispatch() for details.
*/
const volatile bool user_lock_boost;

/*
* Scheduling statistics.
*/
Expand Down Expand Up @@ -153,122 +147,13 @@ struct task_ctx *try_lookup_task_ctx(const struct task_struct *p)
(struct task_struct *)p, 0, 0);
}

/*
* User-space locking detection: re-using a logic similar to scx_lavd.
*/
struct futex_vector;
struct hrtimer_sleeper;
struct file;

static bool is_task_locked(struct task_struct *p)
{
struct task_ctx *tctx;

tctx = try_lookup_task_ctx(p);
return tctx ? tctx->lock_boost : false;
}

static void task_lock(struct task_struct *p)
{
struct task_ctx *tctx;

tctx = try_lookup_task_ctx(p);
if (tctx)
tctx->lock_boost = true;
}

static void task_unlock(struct task_struct *p)
{
struct task_ctx *tctx;

tctx = try_lookup_task_ctx(p);
if (tctx)
tctx->lock_boost = false;
}

SEC("fexit/__futex_wait")
int BPF_PROG(fexit___futex_wait, u32 *uaddr, unsigned int flags, u32 val,
struct hrtimer_sleeper *to, u32 bitset, int ret)
{
if (ret == 0) {
struct task_struct *p = (void *)bpf_get_current_task_btf();
task_lock(p);
}
return 0;
}

SEC("fexit/futex_wait_multiple")
int BPF_PROG(fexit_futex_wait_multiple, struct futex_vector *vs,
unsigned int count, struct hrtimer_sleeper *to, int ret)
{
if (ret == 0) {
struct task_struct *p = (void *)bpf_get_current_task_btf();
task_lock(p);
}
return 0;
}

SEC("fexit/futex_wait_requeue_pi")
int BPF_PROG(fexit_futex_wait_requeue_pi, u32 *uaddr, unsigned int flags,
u32 val, ktime_t *abs_time, u32 bitset, u32 *uaddr2, int ret)
{
if (ret == 0) {
struct task_struct *p = (void *)bpf_get_current_task_btf();
task_lock(p);
}
return 0;
}

SEC("fexit/futex_lock_pi")
int BPF_PROG(fexit_futex_lock_pi, u32 *uaddr, unsigned int flags,
ktime_t *time, int trylock, int ret)
{
if (ret == 0) {
struct task_struct *p = (void *)bpf_get_current_task_btf();
task_lock(p);
}
return 0;
}

SEC("fexit/futex_wake")
int BPF_PROG(fexit_futex_wake, u32 *uaddr, unsigned int flags,
int nr_wake, u32 bitset, int ret)
{
if (ret == 0) {
struct task_struct *p = (void *)bpf_get_current_task_btf();
task_unlock(p);
}
return 0;
}

SEC("fexit/futex_wake_op")
int BPF_PROG(fexit_futex_wake_op, u32 *uaddr1, unsigned int flags,
u32 *uaddr2, int nr_wake, int nr_wake2, int op, int ret)
{
if (ret == 0) {
struct task_struct *p = (void *)bpf_get_current_task_btf();
task_unlock(p);
}
return 0;
}

SEC("fexit/futex_unlock_pi")
int BPF_PROG(fexit_futex_unlock_pi, u32 *uaddr, unsigned int flags, int ret)
{
if (ret == 0) {
struct task_struct *p = (void *)bpf_get_current_task_btf();
task_unlock(p);
}
return 0;
}

/*
* Prevent excessive prioritization of tasks performing massive fsync()
* operations on the filesystem. These tasks can degrade system responsiveness
* by not being inherently latency-sensitive.
*/
SEC("kprobe/vfs_fsync_range")
int BPF_PROG(fexit_vfs_fsync_range, struct file *file, u64 start, u64 end, int datasync)
SEC("?kprobe/vfs_fsync_range")
int kprobe_vfs_fsync_range(struct file *file, u64 start, u64 end, int datasync)
{
struct task_struct *p = (void *)bpf_get_current_task_btf();
struct task_ctx *tctx;
Expand Down Expand Up @@ -822,17 +707,6 @@ void BPF_STRUCT_OPS(flash_enqueue, struct task_struct *p, u64 enq_flags)

void BPF_STRUCT_OPS(flash_dispatch, s32 cpu, struct task_struct *prev)
{
/*
* If the task can still run and it's holding a user-space lock, let it
* run for another round.
*/
if (user_lock_boost && prev && (prev->scx.flags & SCX_TASK_QUEUED) &&
is_task_locked(prev)) {
task_unlock(prev);
task_refill_slice(prev);
return;
}

/*
* Select a new task to run.
*/
Expand Down
27 changes: 18 additions & 9 deletions scheds/rust/scx_flash/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,18 @@ use anyhow::Context;
use anyhow::Result;
use clap::Parser;
use crossbeam::channel::RecvTimeoutError;
use libbpf_rs::libbpf_sys::bpf_program__set_autoload;
use libbpf_rs::skel::OpenSkel;
use libbpf_rs::skel::Skel;
use libbpf_rs::skel::SkelBuilder;
use libbpf_rs::AsRawLibbpf;
use libbpf_rs::OpenObject;
use libbpf_rs::ProgramInput;
use log::info;
use log::warn;
use scx_stats::prelude::*;
use scx_utils::build_id;
use scx_utils::compat;
use scx_utils::import_enums;
use scx_utils::scx_enums;
use scx_utils::scx_ops_attach;
Expand Down Expand Up @@ -74,14 +77,6 @@ struct Opts {
#[clap(short = 'k', long, action = clap::ArgAction::SetTrue)]
local_kthreads: bool,

/// Enable user-space lock owner prioritization.
///
/// Enabling this can improve workload performance when the workload tends to trigger
/// large amount of futex() syscall. For some workloads such as database transaction,
/// it might not be beneficial and even degrade the performance.
#[clap(short = 'u', long, action = clap::ArgAction::SetTrue)]
user_lock_boost: bool,

/// Enable stats monitoring with the specified interval.
#[clap(long)]
stats: Option<f64>,
Expand Down Expand Up @@ -147,10 +142,24 @@ impl<'a> Scheduler<'a> {
skel.maps.rodata_data.slice_max = opts.slice_us_max * 1000;
skel.maps.rodata_data.slice_lag = opts.slice_us_lag * 1000;
skel.maps.rodata_data.local_kthreads = opts.local_kthreads;
skel.maps.rodata_data.user_lock_boost = opts.user_lock_boost;

skel.maps.rodata_data.smt_enabled = smt_enabled;

// Conditionally load the kprobes used by the scheduler.
if compat::ksym_exists("vfs_fsync_range").unwrap_or(false) {
unsafe {
bpf_program__set_autoload(
skel.progs
.kprobe_vfs_fsync_range
.as_libbpf_object()
.as_ptr(),
true,
);
}
} else {
warn!("vfs_fsync_range symbol is missing")
}

// Load the BPF program for validation.
let mut skel = scx_ops_load!(skel, flash_ops, uei)?;

Expand Down