Skip to content

Commit

Permalink
fix kubernetes#51135 make CFS quota period configurable, adds a cli f…
Browse files Browse the repository at this point in the history
…lag and config option to kubelet to be able to set cpu.cfs_period and defaults to 100ms as before.

It requires to enable feature gate CustomCPUCFSQuotaPeriod.

Signed-off-by: Sandor Szücs <sandor.szuecs@zalando.de>
  • Loading branch information
szuecs authored and JoelSpeed committed Dec 3, 2018
1 parent 753b2db commit ad3771b
Show file tree
Hide file tree
Showing 33 changed files with 1,235 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ KubeletConfiguration:
containerLogMaxSize: 10Mi
contentType: application/vnd.kubernetes.protobuf
cpuCFSQuota: true
cpuCFSQuotaPeriod: 0s
cpuManagerPolicy: none
cpuManagerReconcilePeriod: 10s
enableControllerAttachDetach: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ kubeletConfiguration:
containerLogMaxSize: 10Mi
contentType: application/vnd.kubernetes.protobuf
cpuCFSQuota: true
cpuCFSQuotaPeriod: 0s
cpuManagerPolicy: none
cpuManagerReconcilePeriod: 10s
enableControllerAttachDetach: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ kubeletConfiguration:
containerLogMaxSize: 10Mi
contentType: application/vnd.kubernetes.protobuf
cpuCFSQuota: true
cpuCFSQuotaPeriod: 0s
cpuManagerPolicy: none
cpuManagerReconcilePeriod: 10s
enableControllerAttachDetach: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ kubeletConfiguration:
containerLogMaxSize: 10Mi
contentType: application/vnd.kubernetes.protobuf
cpuCFSQuota: true
cpuCFSQuotaPeriod: 0s
cpuManagerPolicy: none
cpuManagerReconcilePeriod: 10s
enableControllerAttachDetach: true
Expand Down
1 change: 1 addition & 0 deletions cmd/kubelet/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,7 @@ func AddKubeletConfigFlags(mainfs *pflag.FlagSet, c *kubeletconfig.KubeletConfig

fs.StringVar(&c.ResolverConfig, "resolv-conf", c.ResolverConfig, "Resolver configuration file used as the basis for the container DNS resolution configuration.")
fs.BoolVar(&c.CPUCFSQuota, "cpu-cfs-quota", c.CPUCFSQuota, "Enable CPU CFS quota enforcement for containers that specify CPU limits")
fs.DurationVar(&c.CPUCFSQuotaPeriod.Duration, "cpu-cfs-quota-period", c.CPUCFSQuotaPeriod.Duration, "Sets CPU CFS quota period value, cpu.cfs_period_us, defaults to Linux Kernel default")
fs.BoolVar(&c.EnableControllerAttachDetach, "enable-controller-attach-detach", c.EnableControllerAttachDetach, "Enables the Attach/Detach controller to manage attachment/detachment of volumes scheduled to this node, and disables kubelet from executing any attach/detach operations")
fs.BoolVar(&c.MakeIPTablesUtilChains, "make-iptables-util-chains", c.MakeIPTablesUtilChains, "If true, kubelet will ensure iptables utility rules are present on host.")
fs.Int32Var(&c.IPTablesMasqueradeBit, "iptables-masquerade-bit", c.IPTablesMasqueradeBit, "The bit of the fwmark space to mark packets for SNAT. Must be within the range [0, 31]. Please match this parameter with corresponding parameter in kube-proxy.")
Expand Down
1 change: 1 addition & 0 deletions cmd/kubelet/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,7 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.Dependencies, stopCh <-chan
ExperimentalCPUManagerReconcilePeriod: s.CPUManagerReconcilePeriod.Duration,
ExperimentalPodPidsLimit: s.PodPidsLimit,
EnforceCPULimits: s.CPUCFSQuota,
CPUCFSQuotaPeriod: s.CPUCFSQuotaPeriod.Duration,
},
s.FailSwapOn,
devicePluginEnabled,
Expand Down
7 changes: 7 additions & 0 deletions pkg/features/kube_features.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,12 @@ const (
// Alternative container-level CPU affinity policies.
CPUManager utilfeature.Feature = "CPUManager"

// owner: @szuecs
// alpha: v1.12
//
// Enable nodes to change CPUCFSQuotaPeriod
CPUCFSQuotaPeriod utilfeature.Feature = "CustomCPUCFSQuotaPeriod"

// owner: @derekwaynecarr
// beta: v1.10
//
Expand Down Expand Up @@ -376,6 +382,7 @@ var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureS
ExpandInUsePersistentVolumes: {Default: false, PreRelease: utilfeature.Alpha},
AttachVolumeLimit: {Default: false, PreRelease: utilfeature.Alpha},
CPUManager: {Default: true, PreRelease: utilfeature.Beta},
CPUCFSQuotaPeriod: {Default: false, PreRelease: utilfeature.Alpha},
ServiceNodeExclusion: {Default: false, PreRelease: utilfeature.Alpha},
MountContainers: {Default: false, PreRelease: utilfeature.Alpha},
VolumeScheduling: {Default: true, PreRelease: utilfeature.Beta},
Expand Down
552 changes: 552 additions & 0 deletions pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go

Large diffs are not rendered by default.

49 changes: 49 additions & 0 deletions pkg/kubelet/apis/config/validation/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package(default_visibility = ["//visibility:public"])

load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)

go_library(
name = "go_default_library",
srcs = [
"validation.go",
"validation_others.go",
"validation_windows.go",
],
importpath = "k8s.io/kubernetes/pkg/kubelet/apis/config/validation",
deps = [
"//pkg/features:go_default_library",
"//pkg/kubelet/apis/config:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/errors:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/validation:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
],
)

filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)

filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

go_test(
name = "go_default_test",
srcs = ["validation_test.go"],
embed = [":go_default_library"],
deps = [
"//pkg/kubelet/apis/config:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/errors:go_default_library",
],
)
1 change: 1 addition & 0 deletions pkg/kubelet/apis/kubeletconfig/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ var (
"Authorization.Webhook.CacheAuthorizedTTL.Duration",
"Authorization.Webhook.CacheUnauthorizedTTL.Duration",
"CPUCFSQuota",
"CPUCFSQuotaPeriod.Duration",
"CPUManagerPolicy",
"CPUManagerReconcilePeriod.Duration",
"QOSReserved[*]",
Expand Down
2 changes: 2 additions & 0 deletions pkg/kubelet/apis/kubeletconfig/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,8 @@ type KubeletConfiguration struct {
// cpuCFSQuota enables CPU CFS quota enforcement for containers that
// specify CPU limits
CPUCFSQuota bool
// CPUCFSQuotaPeriod sets the CPU CFS quota period value, cpu.cfs_period_us, defaults to 100ms
CPUCFSQuotaPeriod metav1.Duration
// maxOpenFiles is Number of files that can be opened by Kubelet process.
MaxOpenFiles int64
// contentType is contentType of requests sent to apiserver.
Expand Down
3 changes: 2 additions & 1 deletion pkg/kubelet/apis/kubeletconfig/v1beta1/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ go_library(
],
importpath = "k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig/v1beta1",
deps = [
"//pkg/kubelet/apis/kubeletconfig:go_default_library",
"//pkg/features:go_default_library",
"//pkg/kubelet/apis/config:go_default_library",
"//pkg/kubelet/qos:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//pkg/master/ports:go_default_library",
Expand Down
4 changes: 4 additions & 0 deletions pkg/kubelet/apis/kubeletconfig/v1beta1/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kruntime "k8s.io/apimachinery/pkg/runtime"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/qos"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/master/ports"
Expand Down Expand Up @@ -151,6 +152,9 @@ func SetDefaults_KubeletConfiguration(obj *KubeletConfiguration) {
if obj.CPUCFSQuota == nil {
obj.CPUCFSQuota = utilpointer.BoolPtr(true)
}
if obj.CPUCFSQuotaPeriod == nil && obj.FeatureGates[string(features.CPUCFSQuotaPeriod)] {
obj.CPUCFSQuotaPeriod = &metav1.Duration{Duration: 100 * time.Millisecond}
}
if obj.MaxOpenFiles == 0 {
obj.MaxOpenFiles = 1000000
}
Expand Down
7 changes: 7 additions & 0 deletions pkg/kubelet/apis/kubeletconfig/v1beta1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,13 @@ type KubeletConfiguration struct {
// Default: true
// +optional
CPUCFSQuota *bool `json:"cpuCFSQuota,omitempty"`
// CPUCFSQuotaPeriod is the CPU CFS quota period value, cpu.cfs_period_us.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// limits set for containers will result in different cpu.cfs_quota settings. This
// will trigger container restarts on the node being reconfigured.
// Default: "100ms"
// +optional
CPUCFSQuotaPeriod *metav1.Duration `json:"cpuCFSQuotaPeriod,omitempty"`
// maxOpenFiles is Number of files that can be opened by Kubelet process.
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
// it may impact the ability of the Kubelet to interact with the node's filesystem.
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions pkg/kubelet/apis/kubeletconfig/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions pkg/kubelet/apis/kubeletconfig/validation/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package validation

import (
"fmt"
"time"

utilerrors "k8s.io/apimachinery/pkg/util/errors"
utilvalidation "k8s.io/apimachinery/pkg/util/validation"
Expand Down Expand Up @@ -51,6 +52,9 @@ func ValidateKubeletConfiguration(kc *kubeletconfig.KubeletConfiguration) error
if kc.HealthzPort != 0 && utilvalidation.IsValidPortNum(int(kc.HealthzPort)) != nil {
allErrors = append(allErrors, fmt.Errorf("invalid configuration: HealthzPort (--healthz-port) %v must be between 1 and 65535, inclusive", kc.HealthzPort))
}
if localFeatureGate.Enabled(features.CPUCFSQuotaPeriod) && utilvalidation.IsInRange(int(kc.CPUCFSQuotaPeriod.Duration), int(1*time.Microsecond), int(time.Second)) != nil {
allErrors = append(allErrors, fmt.Errorf("invalid configuration: CPUCFSQuotaPeriod (--cpu-cfs-quota-period) %v must be between 1usec and 1sec, inclusive", kc.CPUCFSQuotaPeriod))
}
if utilvalidation.IsInRange(int(kc.ImageGCHighThresholdPercent), 0, 100) != nil {
allErrors = append(allErrors, fmt.Errorf("invalid configuration: ImageGCHighThresholdPercent (--image-gc-high-threshold) %v must be between 0 and 100, inclusive", kc.ImageGCHighThresholdPercent))
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/kubelet/apis/kubeletconfig/validation/validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ package validation

import (
"testing"
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig"
)
Expand Down Expand Up @@ -47,6 +49,7 @@ func TestValidateKubeletConfiguration(t *testing.T) {
RegistryBurst: 10,
RegistryPullQPS: 5,
HairpinMode: kubeletconfig.PromiscuousBridge,
CPUCFSQuotaPeriod: metav1.Duration{Duration: 100 * time.Millisecond},
}
if allErrors := ValidateKubeletConfiguration(successCase); allErrors != nil {
t.Errorf("expect no errors got %v", allErrors)
Expand Down Expand Up @@ -75,6 +78,7 @@ func TestValidateKubeletConfiguration(t *testing.T) {
RegistryBurst: -10,
RegistryPullQPS: -10,
HairpinMode: "foo",
CPUCFSQuotaPeriod: metav1.Duration{Duration: 0},
}
if allErrors := ValidateKubeletConfiguration(errorCase); len(allErrors.(utilerrors.Aggregate).Errors()) != 21 {
t.Errorf("expect 21 errors got %v", len(allErrors.(utilerrors.Aggregate).Errors()))
Expand Down
1 change: 1 addition & 0 deletions pkg/kubelet/apis/kubeletconfig/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions pkg/kubelet/cm/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,14 @@ go_test(
embed = [":go_default_library"],
deps = select({
"@io_bazel_rules_go//go/platform:linux": [
"//pkg/features:go_default_library",
"//pkg/kubelet/eviction/api:go_default_library",
"//pkg/util/mount:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature/testing:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",
"//vendor/github.com/stretchr/testify/require:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
Expand Down
1 change: 1 addition & 0 deletions pkg/kubelet/cm/container_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ type NodeConfig struct {
ExperimentalCPUManagerReconcilePeriod time.Duration
ExperimentalPodPidsLimit int64
EnforceCPULimits bool
CPUCFSQuotaPeriod time.Duration
}

type NodeAllocatableConfig struct {
Expand Down
1 change: 1 addition & 0 deletions pkg/kubelet/cm/container_manager_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager {
cgroupManager: cm.cgroupManager,
podPidsLimit: cm.ExperimentalPodPidsLimit,
enforceCPULimits: cm.EnforceCPULimits,
cpuCFSQuotaPeriod: uint64(cm.CPUCFSQuotaPeriod / time.Microsecond),
}
}
return &podContainerManagerNoop{
Expand Down
19 changes: 11 additions & 8 deletions pkg/kubelet/cm/helpers_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,11 @@ import (

"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/api/v1/resource"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
kubefeatures "k8s.io/kubernetes/pkg/features"
)

const (
Expand All @@ -44,28 +46,29 @@ const (
)

// MilliCPUToQuota converts milliCPU to CFS quota and period values.
func MilliCPUToQuota(milliCPU int64) (quota int64, period uint64) {
func MilliCPUToQuota(milliCPU int64, period int64) (quota int64) {
// CFS quota is measured in two values:
// - cfs_period_us=100ms (the amount of time to measure usage across)
// - cfs_period_us=100ms (the amount of time to measure usage across given by period)
// - cfs_quota=20ms (the amount of cpu time allowed to be used across a period)
// so in the above example, you are limited to 20% of a single CPU
// for multi-cpu environments, you just scale equivalent amounts
// see https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt for details

if milliCPU == 0 {
return
}

// we set the period to 100ms by default
period = QuotaPeriod
if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.CPUCFSQuotaPeriod) {
period = QuotaPeriod
}

// we then convert your milliCPU to a value normalized over a period
quota = (milliCPU * QuotaPeriod) / MilliCPUToCPU
quota = (milliCPU * period) / MilliCPUToCPU

// quota needs to be a minimum of 1ms.
if quota < MinQuotaPeriod {
quota = MinQuotaPeriod
}

return
}

Expand Down Expand Up @@ -103,7 +106,7 @@ func HugePageLimits(resourceList v1.ResourceList) map[int64]int64 {
}

// ResourceConfigForPod takes the input pod and outputs the cgroup resource config.
func ResourceConfigForPod(pod *v1.Pod, enforceCPULimits bool) *ResourceConfig {
func ResourceConfigForPod(pod *v1.Pod, enforceCPULimits bool, cpuPeriod uint64) *ResourceConfig {
// sum requests and limits.
reqs, limits := resource.PodRequestsAndLimits(pod)

Expand All @@ -122,7 +125,7 @@ func ResourceConfigForPod(pod *v1.Pod, enforceCPULimits bool) *ResourceConfig {

// convert to CFS values
cpuShares := MilliCPUToShares(cpuRequests)
cpuQuota, cpuPeriod := MilliCPUToQuota(cpuLimits)
cpuQuota := MilliCPUToQuota(cpuLimits, int64(cpuPeriod))

// track if limits were applied for each resource.
memoryLimitsDeclared := true
Expand Down
Loading

0 comments on commit ad3771b

Please sign in to comment.