Skip to content

Commit

Permalink
Merge pull request #27853 from dubstack/dubstack-inject-qos-creation2
Browse files Browse the repository at this point in the history
Automatic merge from submit-queue

[Kubelet] Improving QOS in kubelet by introducing QoS level Cgroups - `--cgroups-per-qos`

This PR is tied to this upstream issue #27204 
Please note that only the last commit is unique to this PR. The first two commits are from previous PR's.

It introduces a new flag in the Kubelet which can be used to specify if the user wants to use the QoS cgroup hierarchy. 

cc @kubernetes/sig-node
  • Loading branch information
k8s-merge-robot committed Jul 15, 2016
2 parents 6193335 + 5000e74 commit b6c8790
Show file tree
Hide file tree
Showing 24 changed files with 1,126 additions and 878 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ test-e2e: ginkgo generated_files
# Example:
# make test-e2e-node FOCUS=kubelet SKIP=container
# make test-e2e-node REMOTE=true DELETE_INSTANCES=true
# make test-e2e-node TEST_ARGS="--cgroups-per-qos=true"
# Build and run tests.
.PHONY: test-e2e-node
test-e2e-node: ginkgo generated_files
Expand Down
1 change: 1 addition & 0 deletions cmd/kubelet/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
fs.MarkDeprecated("system-container", "Use --system-cgroups instead. Will be removed in a future version.")
fs.StringVar(&s.SystemCgroups, "system-cgroups", s.SystemCgroups, "Optional absolute name of cgroups in which to place all non-kernel processes that are not already inside a cgroup under `/`. Empty for no container. Rolling back the flag requires a reboot. (Default: \"\").")

fs.BoolVar(&s.CgroupsPerQOS, "cgroups-per-qos", s.CgroupsPerQOS, "Enable creation of QoS cgroup hierarchy, if true top level QoS and pod cgroups are created.")
fs.StringVar(&s.CgroupRoot, "cgroup-root", s.CgroupRoot, "Optional root cgroup to use for pods. This is handled by the container runtime on a best effort basis. Default: '', which means use the container runtime default.")
fs.StringVar(&s.ContainerRuntime, "container-runtime", s.ContainerRuntime, "The container runtime to use. Possible values: 'docker', 'rkt'. Default: 'docker'.")
fs.DurationVar(&s.RuntimeRequestTimeout.Duration, "runtime-request-timeout", s.RuntimeRequestTimeout.Duration, "Timeout of all runtime requests except long running request - pull, logs, exec and attach. When timeout exceeded, kubelet will cancel the request, throw out an error and retry later. Default: 2m0s")
Expand Down
7 changes: 6 additions & 1 deletion cmd/kubelet/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ func UnsecuredKubeletConfig(s *options.KubeletServer) (*KubeletConfig, error) {
EnableControllerAttachDetach: s.EnableControllerAttachDetach,
EnableCustomMetrics: s.EnableCustomMetrics,
EnableDebuggingHandlers: s.EnableDebuggingHandlers,
CgroupsPerQOS: s.CgroupsPerQOS,
EnableServer: s.EnableServer,
EventBurst: int(s.EventBurst),
EventRecordQPS: float32(s.EventRecordQPS),
Expand Down Expand Up @@ -363,12 +364,13 @@ func run(s *options.KubeletServer, kcfg *KubeletConfig) (err error) {
if kcfg.SystemCgroups != "" && kcfg.CgroupRoot == "" {
return fmt.Errorf("invalid configuration: system container was specified and cgroup root was not specified")
}

kcfg.ContainerManager, err = cm.NewContainerManager(kcfg.Mounter, kcfg.CAdvisorInterface, cm.NodeConfig{
RuntimeCgroupsName: kcfg.RuntimeCgroups,
SystemCgroupsName: kcfg.SystemCgroups,
KubeletCgroupsName: kcfg.KubeletCgroups,
ContainerRuntime: kcfg.ContainerRuntime,
CgroupsPerQOS: kcfg.CgroupsPerQOS,
CgroupRoot: kcfg.CgroupRoot,
})
if err != nil {
return err
Expand Down Expand Up @@ -575,6 +577,7 @@ func SimpleKubelet(client *clientset.Clientset,
EnableCustomMetrics: false,
EnableDebuggingHandlers: true,
EnableServer: true,
CgroupsPerQOS: false,
FileCheckFrequency: fileCheckFrequency,
// Since this kubelet runs with --configure-cbr0=false, it needs to use
// hairpin-veth to allow hairpin packets. Note that this deviates from
Expand Down Expand Up @@ -798,6 +801,7 @@ type KubeletConfig struct {
EnableControllerAttachDetach bool
EnableCustomMetrics bool
EnableDebuggingHandlers bool
CgroupsPerQOS bool
EnableServer bool
EventClient *clientset.Clientset
EventBurst int
Expand Down Expand Up @@ -929,6 +933,7 @@ func CreateAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.Pod
kc.NodeLabels,
kc.NodeStatusUpdateFrequency,
kc.OSInterface,
kc.CgroupsPerQOS,
kc.CgroupRoot,
kc.ContainerRuntime,
kc.RuntimeRequestTimeout,
Expand Down
8 changes: 8 additions & 0 deletions docs/devel/e2e-node-tests.md
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,14 @@ less useful for catching flakes related creating the instance from an image.**
make test-e2e-node REMOTE=true RUN_UNTIL_FAILURE=true
```

## Additional QoS Cgroups Hierarchy level testing

For testing with the QoS Cgroup Hierarchy enabled, you can pass --cgroups-per-qos flag as an argument into Ginkgo using TEST_ARGS

```sh
make test_e2e_node TEST_ARGS="--cgroups-per-qos=true"
```

# Notes on tests run by the Kubernetes project during pre-, post- submit.

The node e2e tests are run by the PR builder for each Pull Request and the results published at
Expand Down
6 changes: 4 additions & 2 deletions hack/make-rules/test-e2e-node.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ cleanup=${CLEANUP:-"true"}
delete_instances=${DELETE_INSTANCES:-"false"}
run_until_failure=${RUN_UNTIL_FAILURE:-"false"}
list_images=${LIST_IMAGES:-"false"}
test_args=${TEST_ARGS:-""}

if [[ $list_images == "true" ]]; then
gcloud compute images list --project="${image_project}" | grep "e2e-node"
Expand Down Expand Up @@ -117,7 +118,7 @@ if [ $remote = true ] ; then
--hosts="$hosts" --images="$images" --cleanup="$cleanup" \
--results-dir="$artifacts" --ginkgo-flags="$ginkgoflags" \
--image-project="$image_project" --instance-name-prefix="$instance_prefix" --setup-node="true" \
--delete-instances="$delete_instances"
--delete-instances="$delete_instances" --test_args="$test_args"
exit $?

else
Expand All @@ -129,6 +130,7 @@ else
# Test using the host the script was run on
# Provided for backwards compatibility
"${ginkgo}" --focus=$focus --skip=$skip "${KUBE_ROOT}/test/e2e_node/" --report-dir=${report} \
-- --alsologtostderr --v 2 --node-name $(hostname) --disable-kubenet=true --build-services=true --start-services=true --stop-services=true
-- --alsologtostderr --v 2 --node-name $(hostname) --disable-kubenet=true --build-services=true \
--start-services=true --stop-services=true "$test_args"
exit $?
fi
1 change: 1 addition & 0 deletions hack/verify-flags/known-flags.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ build-tag
cadvisor-port
cert-dir
certificate-authority
cgroups-per-qos
cgroup-root
chaos-chance
clean-start
Expand Down
Loading

0 comments on commit b6c8790

Please sign in to comment.