Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 117 additions & 2 deletions scheds/rust/scx_bpfland/src/bpf/main.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,24 @@ volatile u64 nr_online_cpus;
*/
static u64 nr_cpu_ids;

/*
* Runtime throttling.
*
* Throttle the CPUs by injecting @throttle_ns idle time every @slice_max.
*/
const volatile u64 throttle_ns;
static volatile bool cpus_throttled;

static inline bool is_throttled(void)
{
return READ_ONCE(cpus_throttled);
}

static inline void set_throttled(bool state)
{
WRITE_ONCE(cpus_throttled, state);
}

/*
* Exit information.
*/
Expand Down Expand Up @@ -134,6 +152,20 @@ struct {
__type(value, struct numa_timer);
} numa_timer SEC(".maps");

/*
* Timer used to inject idle cycles when CPU throttling is enabled.
*/
struct throttle_timer {
struct bpf_timer timer;
};

struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 1);
__type(key, u32);
__type(value, struct throttle_timer);
} throttle_timer SEC(".maps");

/*
* Per-node context.
*/
Expand Down Expand Up @@ -781,6 +813,9 @@ s32 BPF_STRUCT_OPS(bpfland_select_cpu, struct task_struct *p,
bool is_idle = false;
s32 cpu;

if (is_throttled())
return prev_cpu;

cpu = pick_idle_cpu(p, prev_cpu, wake_flags, &is_idle);
if (is_idle) {
int node = __COMPAT_scx_bpf_cpu_node(cpu);
Expand All @@ -807,6 +842,9 @@ static bool kick_idle_cpu(const struct task_struct *p, const struct task_ctx *tc
s32 cpu = scx_bpf_task_cpu(p);
int node = __COMPAT_scx_bpf_cpu_node(cpu);

if (is_throttled())
return false;

/*
* No need to look for full-idle SMT cores if SMT is disabled.
*/
Expand Down Expand Up @@ -877,6 +915,12 @@ static bool try_direct_dispatch(struct task_struct *p, struct task_ctx *tctx,
return true;
}

/*
* Skip direct dispatch if the CPUs are forced to stay idle.
*/
if (is_throttled())
return false;

/*
* If ops.select_cpu() has been skipped, try direct dispatch.
*/
Expand Down Expand Up @@ -1066,6 +1110,12 @@ void BPF_STRUCT_OPS(bpfland_dispatch, s32 cpu, struct task_struct *prev)
{
int node = __COMPAT_scx_bpf_cpu_node(cpu);

/*
* Let the CPU go idle if the system is throttled.
*/
if (is_throttled())
return;

/*
* Consume regular tasks from the shared DSQ, transferring them to the
* local CPU DSQ.
Expand Down Expand Up @@ -1429,6 +1479,50 @@ static void init_cpuperf_target(void)
scx_bpf_put_cpumask(online_cpumask);
}

/*
* Throttle timer used to inject idle time across all the CPUs.
*/
static int throttle_timerfn(void *map, int *key, struct bpf_timer *timer)
{
bool throttled = is_throttled();
u64 flags, duration;
s32 cpu;
int err;

/*
* Stop the CPUs sending a preemption IPI (SCX_KICK_PREEMPT) if we
* need to interrupt the running tasks and inject the idle sleep.
*
* Otherwise, send a wakeup IPI to resume from the injected idle
* sleep.
*/
if (throttled) {
flags = SCX_KICK_IDLE;
duration = slice_max;
} else {
flags = SCX_KICK_PREEMPT;
duration = throttle_ns;
}

/*
* Flip the throttled state.
*/
set_throttled(!throttled);

bpf_for(cpu, 0, nr_cpu_ids)
scx_bpf_kick_cpu(cpu, flags);

/*
* Re-arm the duty-cycle timer setting the runtime or the idle time
* duration.
*/
err = bpf_timer_start(timer, duration, 0);
if (err)
scx_bpf_error("Failed to re-arm duty cycle timer");

return 0;
}

/*
* Refresh NUMA statistics.
*/
Expand Down Expand Up @@ -1545,21 +1639,42 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(bpfland_init)
if (err)
return err;

timer = bpf_map_lookup_elem(&throttle_timer, &key);
if (!timer) {
scx_bpf_error("Failed to lookup throttle timer");
return -ESRCH;
}

/*
* Fire the throttle timer if CPU throttling is enabled.
*/
if (throttle_ns) {
bpf_timer_init(timer, &throttle_timer, CLOCK_BOOTTIME);
bpf_timer_set_callback(timer, throttle_timerfn);
err = bpf_timer_start(timer, slice_max, 0);
if (err) {
scx_bpf_error("Failed to arm throttle timer");
return err;
}
}

/* Do not update NUMA statistics if there's only one node */
if (numa_disabled || __COMPAT_scx_bpf_nr_node_ids() <= 1)
return 0;

timer = bpf_map_lookup_elem(&numa_timer, &key);
if (!timer) {
scx_bpf_error("Failed to lookup central timer");
scx_bpf_error("Failed to lookup NUMA timer");
return -ESRCH;
}

bpf_timer_init(timer, &numa_timer, CLOCK_BOOTTIME);
bpf_timer_set_callback(timer, numa_timerfn);
err = bpf_timer_start(timer, NSEC_PER_SEC, 0);
if (err)
if (err) {
scx_bpf_error("Failed to start NUMA timer");
return err;
}

return 0;
}
Expand Down
13 changes: 12 additions & 1 deletion scheds/rust/scx_bpfland/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,13 @@ struct Opts {
#[clap(short = 'l', long, allow_hyphen_values = true, default_value = "20000")]
slice_us_lag: i64,

/// Throttle the running CPUs by periodically injecting idle cycles.
///
/// This option can help extend battery life on portable devices, reduce heating, fan noise
/// and overall energy consumption (0 = disable).
#[clap(short = 't', long, default_value = "0")]
throttle_us: u64,

/// Set CPU idle QoS resume latency in microseconds (-1 = disabled).
///
/// Setting a lower latency value makes CPUs less likely to enter deeper idle states, enhancing
Expand Down Expand Up @@ -303,12 +310,16 @@ impl<'a> Scheduler<'a> {
skel.maps.rodata_data.smt_enabled = smt_enabled;
skel.maps.rodata_data.numa_disabled = opts.disable_numa;
skel.maps.rodata_data.local_pcpu = opts.local_pcpu;
skel.maps.rodata_data.local_kthreads = opts.local_kthreads;
skel.maps.rodata_data.no_preempt = opts.no_preempt;
skel.maps.rodata_data.no_wake_sync = opts.no_wake_sync;
skel.maps.rodata_data.slice_max = opts.slice_us * 1000;
skel.maps.rodata_data.slice_min = opts.slice_us_min * 1000;
skel.maps.rodata_data.slice_lag = opts.slice_us_lag * 1000;
skel.maps.rodata_data.throttle_ns = opts.throttle_us * 1000;

// Implicitly enable direct dispatch of per-CPU kthreads if CPU throttling is enabled
// (it's never a good idea to throttle per-CPU kthreads).
skel.maps.rodata_data.local_kthreads = opts.local_kthreads || opts.throttle_us > 0;

// Set scheduler compatibility flags.
skel.maps.rodata_data.__COMPAT_SCX_PICK_IDLE_IN_NODE = *compat::SCX_PICK_IDLE_IN_NODE;
Expand Down