From 91f49a1aa00e9010d3a8cd2e9bc52a14497765df Mon Sep 17 00:00:00 2001 From: Daniel Hodges Date: Wed, 28 May 2025 05:58:26 -0700 Subject: [PATCH] scx_p2dq: Fix stalls in affinitized tasks When selecting an idle CPU mask for affinitized tasks reset the temporary CPU mask used for finding an idle CPU. This fixes stalls in affinitized task handling under heavy load. Signed-off-by: Daniel Hodges --- scheds/rust/scx_p2dq/src/bpf/main.bpf.c | 31 +++++++++++++++++-------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/scheds/rust/scx_p2dq/src/bpf/main.bpf.c b/scheds/rust/scx_p2dq/src/bpf/main.bpf.c index d25a941640..c01e689f6b 100644 --- a/scheds/rust/scx_p2dq/src/bpf/main.bpf.c +++ b/scheds/rust/scx_p2dq/src/bpf/main.bpf.c @@ -534,6 +534,10 @@ static s32 pick_idle_affinitized_cpu(struct task_struct *p, task_ctx *taskc, } } + if (llcx->cpumask) + bpf_cpumask_and(mask, cast_mask(llcx->cpumask), + p->cpus_ptr); + // Next try to find an idle CPU in the LLC cpu = scx_bpf_pick_idle_cpu(cast_mask(mask), 0); if (cpu >= 0) { @@ -797,10 +801,13 @@ static __always_inline void async_p2dq_enqueue(struct enqueue_promise *ret, * Per-cpu kthreads are considered interactive and dispatched directly * into the local DSQ. */ - if ((p->flags & PF_KTHREAD) && p->cpus_ptr == &p->cpus_mask && p->nr_cpus_allowed != nr_cpus && + if ((p->flags & PF_KTHREAD) && + p->cpus_ptr == &p->cpus_mask && + p->nr_cpus_allowed != nr_cpus && + bpf_cpumask_test_cpu(cpu, p->cpus_ptr) && kthreads_local) { stat_inc(P2DQ_STAT_DIRECT); - scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, dsq_time_slices[0], enq_flags); + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON|cpu, dsq_time_slices[0], enq_flags); ret->kind = P2DQ_ENQUEUE_PROMISE_COMPLETE; return; } @@ -812,15 +819,19 @@ static __always_inline void async_p2dq_enqueue(struct enqueue_promise *ret, } // Handle affinitized tasks separately - if (!taskc->all_cpus) { + if (!taskc->all_cpus || + (p->cpus_ptr == &p->cpus_mask && + p->nr_cpus_allowed != nr_cpus)) { bool is_idle = false; - cpu = pick_idle_affinitized_cpu(p, taskc, cpu, &is_idle); - if (!(cpuc = lookup_cpu_ctx(cpu)) || - !(llcx = lookup_llc_ctx(cpuc->llc_id))) { - scx_bpf_error("invalid lookup"); - ret->kind = P2DQ_ENQUEUE_PROMISE_COMPLETE; - return; - } + if (!bpf_cpumask_test_cpu(cpu, p->cpus_ptr)) + cpu = pick_idle_affinitized_cpu(p, taskc, cpu, &is_idle); + + // if (!(cpuc = lookup_cpu_ctx(cpu)) || + // !(llcx = lookup_llc_ctx(cpuc->llc_id))) { + // scx_bpf_error("invalid lookup"); + // ret->kind = P2DQ_ENQUEUE_PROMISE_COMPLETE; + // return; + // } scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON|cpu, taskc->slice_ns, enq_flags); if (is_idle) {