Skip to content

Commit

Permalink
Support process.scheduler
Browse files Browse the repository at this point in the history
Spec: opencontainers/runtime-spec#1188
Fix: opencontainers#3895

Co-authored-by: utam0k <k0ma@utam0k.jp>
Signed-off-by: utam0k <k0ma@utam0k.jp>
Signed-off-by: lifubang <lifubang@acmcoder.com>
  • Loading branch information
utam0k authored and lifubang committed Oct 4, 2023
1 parent 634280f commit cf74bfc
Show file tree
Hide file tree
Showing 11 changed files with 208 additions and 1 deletion.
1 change: 0 additions & 1 deletion docs/spec-conformance.md
Expand Up @@ -12,7 +12,6 @@ v1.0.0 | `SCMP_ARCH_PARISC64` | Unplanned, due to lack
v1.0.2 | `.linux.personality` | [#3126](https://github.com/opencontainers/runc/pull/3126)
v1.1.0 | `SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV` | [#3862](https://github.com/opencontainers/runc/pull/3862)
v1.1.0 | rsvd hugetlb cgroup | TODO ([#3859](https://github.com/opencontainers/runc/issues/3859))
v1.1.0 | `.process.scheduler` | TODO ([#3895](https://github.com/opencontainers/runc/issues/3895))
v1.1.0 | `.process.ioPriority` | [#3783](https://github.com/opencontainers/runc/pull/3783)


Expand Down
63 changes: 63 additions & 0 deletions libcontainer/configs/config.go
Expand Up @@ -8,6 +8,7 @@ import (
"time"

"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"

"github.com/opencontainers/runc/libcontainer/devices"
"github.com/opencontainers/runtime-spec/specs-go"
Expand Down Expand Up @@ -219,6 +220,68 @@ type Config struct {

// TimeOffsets specifies the offset for supporting time namespaces.
TimeOffsets map[string]specs.LinuxTimeOffset `json:"time_offsets,omitempty"`

// Scheduler represents the scheduling attributes for a process.
Scheduler *Scheduler `json:"scheduler,omitempty"`
}

// Scheduler is based on the Linux sched_setattr(2) syscall.
type Scheduler = specs.Scheduler

// ToSchedAttr is to convert *configs.Scheduler to *unix.SchedAttr.
func ToSchedAttr(scheduler *Scheduler) (*unix.SchedAttr, error) {
var policy uint32
switch scheduler.Policy {
case specs.SchedOther:
policy = 0
case specs.SchedFIFO:
policy = 1
case specs.SchedRR:
policy = 2
case specs.SchedBatch:
policy = 3
case specs.SchedISO:
policy = 4
case specs.SchedIdle:
policy = 5
case specs.SchedDeadline:
policy = 6
default:
return nil, fmt.Errorf("invalid scheduler policy: %s", scheduler.Policy)
}

var flags uint64
for _, flag := range scheduler.Flags {
switch flag {
case specs.SchedFlagResetOnFork:
flags |= 0x01
case specs.SchedFlagReclaim:
flags |= 0x02
case specs.SchedFlagDLOverrun:
flags |= 0x04
case specs.SchedFlagKeepPolicy:
flags |= 0x08
case specs.SchedFlagKeepParams:
flags |= 0x10
case specs.SchedFlagUtilClampMin:
flags |= 0x20
case specs.SchedFlagUtilClampMax:
flags |= 0x40
default:
return nil, fmt.Errorf("invalid scheduler flag: %s", flag)
}
}

return &unix.SchedAttr{
Size: unix.SizeofSchedAttr,
Policy: policy,
Flags: flags,
Nice: scheduler.Nice,
Priority: uint32(scheduler.Priority),
Runtime: scheduler.Runtime,
Deadline: scheduler.Deadline,
Period: scheduler.Period,
}, nil
}

type (
Expand Down
23 changes: 23 additions & 0 deletions libcontainer/configs/validate/validator.go
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runtime-spec/specs-go"
selinux "github.com/opencontainers/selinux/go-selinux"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
Expand All @@ -30,6 +31,7 @@ func Validate(config *configs.Config) error {
intelrdtCheck,
rootlessEUIDCheck,
mountsStrict,
scheduler,
}
for _, c := range checks {
if err := c(config); err != nil {
Expand Down Expand Up @@ -353,3 +355,24 @@ func isHostNetNS(path string) (bool, error) {

return (st1.Dev == st2.Dev) && (st1.Ino == st2.Ino), nil
}

// scheduler is to validate scheduler configs according to https://man7.org/linux/man-pages/man2/sched_setattr.2.html
func scheduler(config *configs.Config) error {
s := config.Scheduler
if s == nil {
return nil
}
if s.Policy == "" {
return errors.New("scheduler policy is required")
}
if s.Nice < -20 || s.Nice > 19 {
return fmt.Errorf("invalid scheduler.nice: %d", s.Nice)
}
if s.Priority != 0 && (s.Policy != specs.SchedFIFO && s.Policy != specs.SchedRR) {
return errors.New("scheduler.priority can only be specified for SchedFIFO or SchedRR policy")
}
if s.Policy != specs.SchedDeadline && (s.Runtime != 0 || s.Deadline != 0 || s.Period != 0) {
return errors.New("scheduler runtime/deadline/period can only be specified for SchedDeadline policy")
}
return nil
}
50 changes: 50 additions & 0 deletions libcontainer/configs/validate/validator_test.go
Expand Up @@ -616,3 +616,53 @@ func TestValidateIDMapMounts(t *testing.T) {
})
}
}

func TestValidateScheduler(t *testing.T) {
testCases := []struct {
isErr bool
policy string
niceValue int32
priority int32
runtime uint64
deadline uint64
period uint64
}{
{isErr: true, niceValue: 0},
{isErr: false, policy: "SCHED_OTHER", niceValue: 19},
{isErr: false, policy: "SCHED_OTHER", niceValue: -20},
{isErr: true, policy: "SCHED_OTHER", niceValue: 20},
{isErr: true, policy: "SCHED_OTHER", niceValue: -21},
{isErr: true, policy: "SCHED_OTHER", priority: 100},
{isErr: false, policy: "SCHED_FIFO", priority: 100},
{isErr: true, policy: "SCHED_FIFO", runtime: 20},
{isErr: true, policy: "SCHED_BATCH", deadline: 30},
{isErr: true, policy: "SCHED_IDLE", period: 40},
{isErr: true, policy: "SCHED_DEADLINE", priority: 100},
{isErr: false, policy: "SCHED_DEADLINE", runtime: 200},
{isErr: false, policy: "SCHED_DEADLINE", deadline: 300},
{isErr: false, policy: "SCHED_DEADLINE", period: 400},
}

for _, tc := range testCases {
scheduler := configs.Scheduler{
Policy: specs.LinuxSchedulerPolicy(tc.policy),
Nice: tc.niceValue,
Priority: tc.priority,
Runtime: tc.runtime,
Deadline: tc.deadline,
Period: tc.period,
}
config := &configs.Config{
Rootfs: "/var",
Scheduler: &scheduler,
}

err := Validate(config)
if tc.isErr && err == nil {
t.Errorf("scheduler: %d, expected error, got nil", tc.niceValue)
}
if !tc.isErr && err != nil {
t.Errorf("scheduler: %d, expected nil, got error %v", tc.niceValue, err)
}
}
}
14 changes: 14 additions & 0 deletions libcontainer/init_linux.go
Expand Up @@ -640,6 +640,20 @@ func setupRlimits(limits []configs.Rlimit, pid int) error {
return nil
}

func setupScheduler(config *configs.Config) error {
attr, err := configs.ToSchedAttr(config.Scheduler)
if err != nil {
return err
}
if err := unix.SchedSetAttr(0, attr, 0); err != nil {
if errors.Is(err, unix.EPERM) && config.Cgroups.CpusetCpus != "" {
return errors.New("process scheduler can't be used together with AllowedCPUs")
}
return fmt.Errorf("error setting scheduler: %w", err)
}
return nil
}

// signalAllProcesses freezes then iterates over all the processes inside the
// manager's cgroups sending the signal s to them.
func signalAllProcesses(m cgroups.Manager, s unix.Signal) error {
Expand Down
2 changes: 2 additions & 0 deletions libcontainer/process.go
Expand Up @@ -95,6 +95,8 @@ type Process struct {
//
// For cgroup v2, the only key allowed is "".
SubCgroupPaths map[string]string

Scheduler *configs.Scheduler
}

// Wait waits for the process to exit.
Expand Down
6 changes: 6 additions & 0 deletions libcontainer/setns_init_linux.go
Expand Up @@ -65,6 +65,12 @@ func (l *linuxSetnsInit) Init() error {
unix.Umask(int(*l.config.Config.Umask))
}

if l.config.Config.Scheduler != nil {
if err := setupScheduler(l.config.Config); err != nil {
return err
}
}

if err := selinux.SetExecLabel(l.config.ProcessLabel); err != nil {
return err
}
Expand Down
4 changes: 4 additions & 0 deletions libcontainer/specconv/spec_linux.go
Expand Up @@ -494,6 +494,10 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
Ambient: spec.Process.Capabilities.Ambient,
}
}
if spec.Process.Scheduler != nil {
s := *spec.Process.Scheduler
config.Scheduler = &s
}
}
createHooks(spec, config)
config.Version = specs.Version
Expand Down
7 changes: 7 additions & 0 deletions libcontainer/standard_init_linux.go
Expand Up @@ -159,6 +159,13 @@ func (l *linuxStandardInit) Init() error {
return &os.SyscallError{Syscall: "prctl(SET_NO_NEW_PRIVS)", Err: err}
}
}

if l.config.Config.Scheduler != nil {
if err := setupScheduler(l.config.Config); err != nil {
return err
}
}

// Tell our parent that we're ready to Execv. This must be done before the
// Seccomp rules have been applied, because we need to be able to read and
// write to a socket.
Expand Down
34 changes: 34 additions & 0 deletions tests/integration/scheduler.bats
@@ -0,0 +1,34 @@
#!/usr/bin/env bats

load helpers

function setup() {
requires root
setup_debian
}

function teardown() {
teardown_bundle
}

@test "scheduler is applied" {
update_config ' .process.scheduler = {"policy": "SCHED_DEADLINE", "nice": 19, "priority": 0, "runtime": 42000, "deadline": 1000000, "period": 1000000, }'

runc run -d --console-socket "$CONSOLE_SOCKET" test_scheduler
[ "$status" -eq 0 ]

runc exec test_scheduler chrt -p 1
[ "$status" -eq 0 ]

[[ "${lines[0]}" == *"scheduling policy: SCHED_DEADLINE" ]]
[[ "${lines[1]}" == *"priority: 0" ]]
[[ "${lines[2]}" == *"runtime/deadline/period parameters: 42000/1000000/1000000" ]]
}

@test "scheduler vs cpus" {
update_config ' .linux.resources.cpu.cpus = "0"
| .process.scheduler = {"policy": "SCHED_DEADLINE", "nice": 19, "runtime": 42000, "deadline": 1000000, "period": 1000000, }'

runc run -d --console-socket "$CONSOLE_SOCKET" test_scheduler
[ "$status" -eq 1 ]
}
5 changes: 5 additions & 0 deletions utils_linux.go
Expand Up @@ -61,6 +61,11 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) {
lp.ConsoleHeight = uint16(p.ConsoleSize.Height)
}

if p.Scheduler != nil {
s := *p.Scheduler
lp.Scheduler = &s
}

if p.Capabilities != nil {
lp.Capabilities = &configs.Capabilities{}
lp.Capabilities.Bounding = p.Capabilities.Bounding
Expand Down

0 comments on commit cf74bfc

Please sign in to comment.