Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions scheds/rust/scx_mitosis/src/bpf/mitosis.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,10 @@ static inline int allocate_cell()
if (!(c = lookup_cell(cell_idx)))
return -1;

if (__sync_bool_compare_and_swap(&c->in_use, 0, 1))
if (__sync_bool_compare_and_swap(&c->in_use, 0, 1)) {
WRITE_ONCE(c->vtime_now, 0);
return cell_idx;
}
}
scx_bpf_error("No available cells to allocate");
return -1;
Expand Down Expand Up @@ -1130,6 +1132,10 @@ void BPF_STRUCT_OPS(mitosis_stopping, struct task_struct *p, bool runnable)
used = now - tctx->started_running_at;
tctx->started_running_at = now;
/* scale the execution time by the inverse of the weight and charge */
if (p->scx.weight == 0) {
scx_bpf_error("Task %d has zero weight", p->pid);
return;
}
p->scx.dsq_vtime += used * 100 / p->scx.weight;

if (cidx != 0 || tctx->all_cell_cpus_allowed) {
Expand Down Expand Up @@ -1210,11 +1216,13 @@ s32 BPF_STRUCT_OPS(mitosis_cgroup_exit, struct cgroup *cgrp)

record_cgroup_exit(cgrp->kn->id);

if (!(cgc = bpf_cgrp_storage_get(&cgrp_ctxs, cgrp, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE))) {
scx_bpf_error("cgrp_ctx creation failed for cgid %llu",
cgrp->kn->id);
return -ENOENT;
/*
* Use lookup without CREATE since this is the exit path. If the cgroup
* doesn't have storage, it's not a cell owner anyway.
*/
if (!(cgc = lookup_cgrp_ctx(cgrp))) {
/* Errors above on failure, verifier. */
return 0;
}

if (cgc->cell_owner) {
Expand Down
88 changes: 54 additions & 34 deletions scheds/rust/scx_mitosis/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ pub mod bpf_intf;
mod stats;

use std::cmp::max;
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use std::fmt;
use std::fmt::Display;
use std::mem::MaybeUninit;
Expand Down Expand Up @@ -145,10 +145,14 @@ impl Display for DistributionStats {
// given logging interval, the global and cell queueing decision counts print at the same width.
// Second, it reduces variance in column width between logging intervals. 5 is simply a heuristic.
const MIN_DECISIONS_WIDTH: usize = 5;
let descisions_width = max(
MIN_DECISIONS_WIDTH,
(self.global_queue_decisions as f64).log10().ceil() as usize,
);
let descisions_width = if self.global_queue_decisions > 0 {
max(
MIN_DECISIONS_WIDTH,
(self.global_queue_decisions as f64).log10().ceil() as usize,
)
} else {
MIN_DECISIONS_WIDTH
};
write!(
f,
"{:width$} {:5.1}% | Local:{:4.1}% From: CPU:{:4.1}% Cell:{:4.1}% | V:{:4.1}%",
Expand Down Expand Up @@ -441,15 +445,17 @@ impl<'a> Scheduler<'a> {

fn refresh_bpf_cells(&mut self) -> Result<()> {
let applied_configuration = unsafe {
std::ptr::read_volatile(
&self
.skel
.maps
.bss_data
.as_ref()
.unwrap()
.applied_configuration_seq as *const u32,
)
let ptr = &self
.skel
.maps
.bss_data
.as_ref()
.unwrap()
.applied_configuration_seq as *const u32;
(ptr as *const std::sync::atomic::AtomicU32)
.as_ref()
.unwrap()
.load(std::sync::atomic::Ordering::Acquire)
};
if self
.last_configuration_seq
Expand All @@ -471,28 +477,35 @@ impl<'a> Scheduler<'a> {
// Create cells we don't have yet, drop cells that are no longer in use.
// If we continue to drop cell metrics once a cell is removed, we'll need to make sure we
// flush metrics for a cell before we remove it completely.
let cells = &self.skel.maps.bss_data.as_ref().unwrap().cells;
for i in 0..MAX_CELLS {
let cell_idx = i as u32;
let bpf_cell = cells[i];
let in_use = unsafe { std::ptr::read_volatile(&bpf_cell.in_use as *const u32) };
if in_use > 0 {
self.cells
.entry(cell_idx)
.or_insert_with(|| Cell {
cpus: Cpumask::new(),
})
.cpus = cell_to_cpus
.get(&cell_idx)
.expect("missing cell in cpu map")
.clone();
self.metrics.cells.insert(cell_idx, CellMetrics::default());
} else {
self.cells.remove(&cell_idx);
self.metrics.cells.remove(&cell_idx);
}
//
// IMPORTANT: We determine which cells exist based on CPU assignments (which are
// synchronized by applied_configuration_seq), NOT by reading the in_use field
// separately. This avoids a TOCTOU race where a cell's in_use is set before
// CPUs are assigned.

// Cell 0 (root cell) always exists even if it has no CPUs temporarily
let cells_with_cpus: HashSet<u32> = cell_to_cpus.keys().copied().collect();
let mut active_cells = cells_with_cpus.clone();
active_cells.insert(0);

for cell_idx in &active_cells {
let cpus = cell_to_cpus
.get(cell_idx)
.cloned()
.unwrap_or_else(|| Cpumask::new());
self.cells
.entry(*cell_idx)
.or_insert_with(|| Cell {
cpus: Cpumask::new(),
})
.cpus = cpus;
self.metrics.cells.insert(*cell_idx, CellMetrics::default());
}

// Remove cells that no longer have CPUs assigned
self.cells.retain(|&k, _| active_cells.contains(&k));
self.metrics.cells.retain(|&k, _| active_cells.contains(&k));

self.last_configuration_seq = Some(applied_configuration);

Ok(())
Expand All @@ -507,6 +520,13 @@ fn read_cpu_ctxs(skel: &BpfSkel) -> Result<Vec<bpf_intf::cpu_ctx>> {
.lookup_percpu(&0u32.to_ne_bytes(), libbpf_rs::MapFlags::ANY)
.context("Failed to lookup cpu_ctx")?
.unwrap();
if cpu_ctxs_vec.len() < *NR_CPUS_POSSIBLE {
bail!(
"Percpu map returned {} entries but expected {}",
cpu_ctxs_vec.len(),
*NR_CPUS_POSSIBLE
);
}
for cpu in 0..*NR_CPUS_POSSIBLE {
cpu_ctxs.push(*unsafe {
&*(cpu_ctxs_vec[cpu].as_slice().as_ptr() as *const bpf_intf::cpu_ctx)
Expand Down
105 changes: 105 additions & 0 deletions scheds/rust/scx_mitosis/test/cleanup_test_cgroups.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#!/bin/bash
# Cleanup all test cgroups created by test scripts

RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'

if [ "$EUID" -ne 0 ]; then
echo -e "${RED}Must run as root${NC}"
exit 1
fi

echo -e "${YELLOW}========================================${NC}"
echo -e "${YELLOW}Cleaning up all test cgroups${NC}"
echo -e "${YELLOW}========================================${NC}\n"

# Find all test-related cgroups with pattern matching
TEST_PATTERNS=(
"test_mitosis*"
"test_cell*"
"test_simple*"
"test_brutal*"
"test_absolute*"
"test_working*"
"test_reuse*"
"test_verify*"
"test_cycle*"
"test_16*"
"test_15*"
"test_*"
"scx_mitosis_test*"
)

CLEANED=0
FAILED=0

# First pass: Find all matching cgroups at root level
ALL_TEST_CGROUPS=()
for pattern in "${TEST_PATTERNS[@]}"; do
shopt -s nullglob # Make glob return nothing if no matches
for cg in /sys/fs/cgroup/${pattern}; do
if [ -d "$cg" ]; then
ALL_TEST_CGROUPS+=("$cg")
fi
done
shopt -u nullglob
done

# Remove duplicates and sort
UNIQUE_CGROUPS=($(printf '%s\n' "${ALL_TEST_CGROUPS[@]}" | sort -u))

if [ ${#UNIQUE_CGROUPS[@]} -eq 0 ]; then
echo -e "${GREEN}No test cgroups found to clean up${NC}"
exit 0
fi

echo -e "${YELLOW}Found ${#UNIQUE_CGROUPS[@]} test cgroups to clean up${NC}\n"

# Clean each cgroup
for test_root in "${UNIQUE_CGROUPS[@]}"; do
if [ ! -d "$test_root" ]; then
continue
fi

echo -e "${YELLOW}Cleaning $(basename $test_root)...${NC}"

# Find all child cgroups, deepest first
if [ -d "$test_root" ]; then
find "$test_root" -mindepth 1 -type d 2>/dev/null | sort -r | while read -r cg; do
# Kill all processes in the cgroup
if [ -f "$cg/cgroup.procs" ]; then
cat "$cg/cgroup.procs" 2>/dev/null | xargs -r kill -9 2>/dev/null || true
fi
sleep 0.05
# Remove the cgroup
rmdir "$cg" 2>/dev/null || true
done
fi

# Remove the root test cgroup
if [ -f "$test_root/cgroup.procs" ]; then
cat "$test_root/cgroup.procs" 2>/dev/null | xargs -r kill -9 2>/dev/null || true
fi
sleep 0.1
rmdir "$test_root" 2>/dev/null || true

if [ -d "$test_root" ]; then
echo -e "${RED} ✗ Failed to remove $(basename $test_root)${NC}"
FAILED=$((FAILED + 1))
else
echo -e "${GREEN} ✓ Removed $(basename $test_root)${NC}"
CLEANED=$((CLEANED + 1))
fi
done

echo -e "\n${YELLOW}========================================${NC}"
echo -e "${GREEN}Cleaned: $CLEANED cgroups${NC}"
if [ $FAILED -gt 0 ]; then
echo -e "${RED}Failed: $FAILED cgroups${NC}"
echo -e "${YELLOW}Try running the cleanup again if some cgroups are still active${NC}"
else
echo -e "${GREEN}All test cgroups removed successfully!${NC}"
fi
echo -e "${YELLOW}========================================${NC}"
Loading