Skip to content

Commit

Permalink
Support process.scheduler
Browse files Browse the repository at this point in the history
Spec: opencontainers/runtime-spec#1188
Fix: opencontainers#3895

Signed-off-by: utam0k <k0ma@utam0k.jp>
Signed-off-by: lifubang <lifubang@acmcoder.com>
  • Loading branch information
utam0k authored and lifubang committed Sep 25, 2023
1 parent f235fa6 commit 109dce2
Show file tree
Hide file tree
Showing 13 changed files with 221 additions and 2 deletions.
1 change: 0 additions & 1 deletion docs/spec-conformance.md
Expand Up @@ -13,7 +13,6 @@ v1.0.2 | `.linux.personality` | [#3126](https://github
v1.1.0 | `SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV` | [#3862](https://github.com/opencontainers/runc/pull/3862)
v1.1.0 | time namespaces | [#3876](https://github.com/opencontainers/runc/pull/3876)
v1.1.0 | rsvd hugetlb cgroup | TODO ([#3859](https://github.com/opencontainers/runc/issues/3859))
v1.1.0 | `.process.scheduler` | TODO ([#3895](https://github.com/opencontainers/runc/issues/3895))
v1.1.0 | `.process.ioPriority` | [#3783](https://github.com/opencontainers/runc/pull/3783)


Expand Down
6 changes: 6 additions & 0 deletions libcontainer/configs/config.go
Expand Up @@ -219,8 +219,14 @@ type Config struct {

// TimeOffsets specifies the offset for supporting time namespaces.
TimeOffsets map[string]specs.LinuxTimeOffset `json:"time_offsets,omitempty"`

// Scheduler represents the scheduling attributes for a process.
Scheduler *Scheduler `json:"scheduler,omitempty"`
}

// Scheduler is based on the Linux sched_setattr(2) syscall.
type Scheduler = specs.Scheduler

type (
HookName string
HookList []Hook
Expand Down
30 changes: 30 additions & 0 deletions libcontainer/configs/validate/validator.go
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runtime-spec/specs-go"
selinux "github.com/opencontainers/selinux/go-selinux"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
Expand All @@ -30,6 +31,7 @@ func Validate(config *configs.Config) error {
intelrdtCheck,
rootlessEUIDCheck,
mountsStrict,
scheduler,
}
for _, c := range checks {
if err := c(config); err != nil {
Expand Down Expand Up @@ -353,3 +355,31 @@ func isHostNetNS(path string) (bool, error) {

return (st1.Dev == st2.Dev) && (st1.Ino == st2.Ino), nil
}

// scheduler is to validate scheduler configs according to https://man7.org/linux/man-pages/man2/sched_setattr.2.html
func scheduler(config *configs.Config) error {
if config.Scheduler == nil {
return nil
}
niceValue := config.Scheduler.Nice
if niceValue < -20 || niceValue > 19 {
return fmt.Errorf("invalid scheduler.nice: %d", niceValue)
}
if config.Scheduler.Policy != specs.SchedFIFO && config.Scheduler.Policy != specs.SchedRR {
if config.Scheduler.Priority != 0 {
return fmt.Errorf("invalid scheduler.priority: %d", config.Scheduler.Priority)
}
}
if config.Scheduler.Policy != specs.SchedDeadline {
if config.Scheduler.Runtime != 0 {
return fmt.Errorf("invalid scheduler.runtime: %d", config.Scheduler.Runtime)
}
if config.Scheduler.Deadline != 0 {
return fmt.Errorf("invalid scheduler.deadline: %d", config.Scheduler.Deadline)
}
if config.Scheduler.Period != 0 {
return fmt.Errorf("invalid scheduler.period: %d", config.Scheduler.Period)
}
}
return nil
}
49 changes: 49 additions & 0 deletions libcontainer/configs/validate/validator_test.go
Expand Up @@ -616,3 +616,52 @@ func TestValidateIDMapMounts(t *testing.T) {
})
}
}

func TestValidateScheduler(t *testing.T) {
testCases := []struct {
isErr bool
policy string
niceValue int32
priority uint32
runtime uint64
deadline uint64
period uint64
}{
{isErr: false, niceValue: 19},
{isErr: false, niceValue: -20},
{isErr: true, niceValue: 20},
{isErr: true, niceValue: -21},
{isErr: true, priority: 100},
{isErr: false, policy: "SCHED_FIFO", priority: 100},
{isErr: true, policy: "SCHED_FIFO", runtime: 20},
{isErr: true, policy: "SCHED_BATCH", deadline: 30},
{isErr: true, policy: "SCHED_IDLE", period: 40},
{isErr: true, policy: "SCHED_DEADLINE", priority: 100},
{isErr: false, policy: "SCHED_DEADLINE", runtime: 200},
{isErr: false, policy: "SCHED_DEADLINE", deadline: 300},
{isErr: false, policy: "SCHED_DEADLINE", period: 400},
}

for _, tc := range testCases {
scheduler := configs.Scheduler{
Policy: specs.LinuxSchedulerPolicy(tc.policy),
Nice: tc.niceValue,
Priority: tc.priority,
Runtime: tc.runtime,
Deadline: tc.deadline,
Period: tc.period,
}
config := &configs.Config{
Rootfs: "/var",
Scheduler: &scheduler,
}

err := Validate(config)
if tc.isErr && err == nil {
t.Errorf("scheduler: %d, expected error, got nil", tc.niceValue)
}
if !tc.isErr && err != nil {
t.Errorf("scheduler: %d, expected nil, got error %v", tc.niceValue, err)
}
}
}
2 changes: 2 additions & 0 deletions libcontainer/process.go
Expand Up @@ -95,6 +95,8 @@ type Process struct {
//
// For cgroup v2, the only key allowed is "".
SubCgroupPaths map[string]string

Scheduler *configs.Scheduler
}

// Wait waits for the process to exit.
Expand Down
1 change: 1 addition & 0 deletions libcontainer/process_linux.go
Expand Up @@ -81,6 +81,7 @@ func (p *setnsProcess) signal(sig os.Signal) error {

func (p *setnsProcess) start() (retErr error) {
defer p.messageSockPair.parent.Close()

// get the "before" value of oom kill count
oom, _ := p.manager.OOMKillCount()
err := p.cmd.Start()
Expand Down
10 changes: 10 additions & 0 deletions libcontainer/setns_init_linux.go
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/opencontainers/runc/libcontainer/keys"
"github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
)

// linuxSetnsInit performs the container's initialization for running a new process
Expand Down Expand Up @@ -65,6 +66,15 @@ func (l *linuxSetnsInit) Init() error {
unix.Umask(int(*l.config.Config.Umask))
}

if l.config.Config.Scheduler != nil {
if err := unix.SchedSetAttr(0, utils.ToSchedAttr(l.config.Config.Scheduler), 0); err != nil {
if errors.Is(err, unix.EPERM) {
return fmt.Errorf("error setting scheduler(please check you have appropriate privileges or the cpus config): %w", err)
}
return fmt.Errorf("error setting scheduler: %w", err)
}
}

if err := selinux.SetExecLabel(l.config.ProcessLabel); err != nil {
return err
}
Expand Down
11 changes: 11 additions & 0 deletions libcontainer/specconv/spec_linux.go
Expand Up @@ -494,6 +494,17 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
Ambient: spec.Process.Capabilities.Ambient,
}
}
if spec.Process.Scheduler != nil {
config.Scheduler = &configs.Scheduler{
Policy: spec.Process.Scheduler.Policy,
Nice: spec.Process.Scheduler.Nice,
Priority: spec.Process.Scheduler.Priority,
Flags: spec.Process.Scheduler.Flags,
Runtime: spec.Process.Scheduler.Runtime,
Deadline: spec.Process.Scheduler.Deadline,
Period: spec.Process.Scheduler.Period,
}
}
}
createHooks(spec, config)
config.Version = specs.Version
Expand Down
11 changes: 11 additions & 0 deletions libcontainer/standard_init_linux.go
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/opencontainers/runc/libcontainer/keys"
"github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
)

type linuxStandardInit struct {
Expand Down Expand Up @@ -159,6 +160,16 @@ func (l *linuxStandardInit) Init() error {
return &os.SyscallError{Syscall: "prctl(SET_NO_NEW_PRIVS)", Err: err}
}
}

if l.config.Config.Scheduler != nil {
if err := unix.SchedSetAttr(0, utils.ToSchedAttr(l.config.Config.Scheduler), 0); err != nil {
if errors.Is(err, unix.EPERM) {
return fmt.Errorf("error setting scheduler(please check you have appropriate privileges or the cpus config): %w", err)
}
return fmt.Errorf("error setting scheduler: %w", err)
}
}

// Tell our parent that we're ready to Execv. This must be done before the
// Seccomp rules have been applied, because we need to be able to read and
// write to a socket.
Expand Down
54 changes: 54 additions & 0 deletions libcontainer/utils/utils_unix.go
Expand Up @@ -10,6 +10,8 @@ import (
"strconv"
"sync"

"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
)

Expand Down Expand Up @@ -98,3 +100,55 @@ func NewSockPair(name string) (parent, child *os.File, err error) {
}
return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil
}

// ToSchedAttr is to convert *configs.Scheduler to *unix.SchedAttr
func ToSchedAttr(scheduler *configs.Scheduler) *unix.SchedAttr {
var policy uint32
switch scheduler.Policy {
case specs.SchedOther:
policy = 0
case specs.SchedFIFO:
policy = 1
case specs.SchedRR:
policy = 2
case specs.SchedBatch:
policy = 3
case specs.SchedISO:
policy = 4
case specs.SchedIdle:
policy = 5
case specs.SchedDeadline:
policy = 6
}

var flags uint64
for _, flag := range scheduler.Flags {
switch flag {
case specs.SchedFlagResetOnFork:
flags |= 0x01
case specs.SchedFlagReclaim:
flags |= 0x02
case specs.SchedFlagDLOverrun:
flags |= 0x04
case specs.SchedFlagKeepPolicy:
flags |= 0x08
case specs.SchedFlagKeepParams:
flags |= 0x10
case specs.SchedFlagUtilClampMin:
flags |= 0x20
case specs.SchedFlagUtilClampMax:
flags |= 0x40
}
}

return &unix.SchedAttr{
Size: unix.SizeofSchedAttr,
Policy: policy,
Flags: flags,
Nice: scheduler.Nice,
Priority: scheduler.Priority,
Runtime: scheduler.Runtime,
Deadline: scheduler.Deadline,
Period: scheduler.Period,
}
}
34 changes: 34 additions & 0 deletions tests/integration/scheduler.bats
@@ -0,0 +1,34 @@
#!/usr/bin/env bats

load helpers

function setup() {
requires root
setup_debian
}

function teardown() {
teardown_bundle
}

@test "scheduler is applied" {
update_config ' .process.scheduler = {"policy": "SCHED_DEADLINE", "nice": 19, "priority": 0, "runtime": 42000, "deadline": 1000000, "period": 1000000, }'

runc run -d --console-socket "$CONSOLE_SOCKET" test_scheduler
[ "$status" -eq 0 ]

runc exec test_scheduler chrt -p 1
[ "$status" -eq 0 ]

[[ "${lines[0]}" == *"scheduling policy: SCHED_DEADLINE" ]]
[[ "${lines[1]}" == *"priority: 0" ]]
[[ "${lines[2]}" == *"runtime/deadline/period parameters: 42000/1000000/1000000" ]]
}

@test "scheduler vs cpus" {
update_config ' .linux.resources.cpu.cpus = "0"
| .process.scheduler = {"policy": "SCHED_DEADLINE", "nice": 19, "runtime": 42000, "deadline": 1000000, "period": 1000000, }'

runc run -d --console-socket "$CONSOLE_SOCKET" test_scheduler
[ "$status" -eq 1 ]
}
12 changes: 12 additions & 0 deletions utils_linux.go
Expand Up @@ -61,6 +61,18 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) {
lp.ConsoleHeight = uint16(p.ConsoleSize.Height)
}

if p.Scheduler != nil {
lp.Scheduler = &configs.Scheduler{
Policy: p.Scheduler.Policy,
Nice: p.Scheduler.Nice,
Priority: p.Scheduler.Priority,
Flags: p.Scheduler.Flags,
Runtime: p.Scheduler.Runtime,
Deadline: p.Scheduler.Deadline,
Period: p.Scheduler.Period,
}
}

if p.Capabilities != nil {
lp.Capabilities = &configs.Capabilities{}
lp.Capabilities.Bounding = p.Capabilities.Bounding
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 109dce2

Please sign in to comment.