From 97ab99aaa04ccc38861ce534be42a1160a7d48e3 Mon Sep 17 00:00:00 2001 From: David Ashpole Date: Mon, 28 Aug 2017 11:17:38 -0700 Subject: [PATCH] remove OutOfDisk from controllers --- pkg/controller/daemon/BUILD | 2 - pkg/controller/daemon/daemon_controller.go | 31 +- .../daemon/daemon_controller_test.go | 146 +--------- pkg/controller/daemon/util/daemonset_util.go | 1 + pkg/controller/node/node_controller.go | 1 - pkg/controller/node/nodecontroller_test.go | 58 ---- test/e2e/node/BUILD | 13 +- test/e2e/node/nodeoutofdisk.go | 269 ------------------ 8 files changed, 7 insertions(+), 514 deletions(-) delete mode 100644 test/e2e/node/nodeoutofdisk.go diff --git a/pkg/controller/daemon/BUILD b/pkg/controller/daemon/BUILD index c013633afa73..cedbe39e1245 100644 --- a/pkg/controller/daemon/BUILD +++ b/pkg/controller/daemon/BUILD @@ -74,8 +74,6 @@ go_test( "//pkg/securitycontext:go_default_library", "//pkg/util/labels:go_default_library", "//plugin/pkg/scheduler/algorithm:go_default_library", - "//plugin/pkg/scheduler/algorithm/predicates:go_default_library", - "//plugin/pkg/scheduler/schedulercache:go_default_library", "//vendor/k8s.io/api/core/v1:go_default_library", "//vendor/k8s.io/api/extensions/v1beta1:go_default_library", "//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library", diff --git a/pkg/controller/daemon/daemon_controller.go b/pkg/controller/daemon/daemon_controller.go index 9878b9972278..4dc0ed16b34a 100644 --- a/pkg/controller/daemon/daemon_controller.go +++ b/pkg/controller/daemon/daemon_controller.go @@ -1180,6 +1180,7 @@ func (dsc *DaemonSetsController) simulate(newPod *v1.Pod, node *v1.Node, ds *ext Effect: v1.TaintEffectNoSchedule, }) + // TODO(#48843) OutOfDisk taints will be removed in 1.10 if utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation) && kubelettypes.IsCriticalPod(newPod) { v1helper.AddOrUpdateTolerationInPod(newPod, &v1.Toleration{ @@ -1221,7 +1222,7 @@ func (dsc *DaemonSetsController) simulate(newPod *v1.Pod, node *v1.Node, ds *ext // summary. Returned booleans are: // * wantToRun: // Returns true when a user would expect a pod to run on this node and ignores conditions -// such as OutOfDisk or insufficient resource that would cause a daemonset pod not to schedule. +// such as DiskPressure or insufficient resource that would cause a daemonset pod not to schedule. // This is primarily used to populate daemonset status. // * shouldSchedule: // Returns true when a daemonset should be scheduled to a node if a daemonset pod is not already @@ -1257,11 +1258,6 @@ func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *v1.Node, ds *exten var emitEvent bool // we try to partition predicates into two partitions here: intentional on the part of the operator and not. switch reason { - case predicates.ErrNodeOutOfDisk: - // the kubelet will evict this pod if it needs to. Let kubelet - // decide whether to continue running this pod so leave shouldContinueRunning - // set to true - shouldSchedule = false // intentional case predicates.ErrNodeSelectorNotMatch, @@ -1344,9 +1340,6 @@ func Predicates(pod *v1.Pod, nodeInfo *schedulercache.NodeInfo) (bool, []algorit fit, reasons, err = predicates.EssentialPredicates(pod, nil, nodeInfo) } else { fit, reasons, err = predicates.GeneralPredicates(pod, nil, nodeInfo) - ncFit, ncReasons := NodeConditionPredicates(nodeInfo) - fit = ncFit && fit - reasons = append(reasons, ncReasons...) } if err != nil { return false, predicateFails, err @@ -1358,26 +1351,6 @@ func Predicates(pod *v1.Pod, nodeInfo *schedulercache.NodeInfo) (bool, []algorit return len(predicateFails) == 0, predicateFails, nil } -func NodeConditionPredicates(nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason) { - reasons := []algorithm.PredicateFailureReason{} - - // If TaintNodesByCondition feature was enabled, account PodToleratesNodeTaints predicates. - if utilfeature.DefaultFeatureGate.Enabled(features.TaintNodesByCondition) { - return true, nil - } - - for _, c := range nodeInfo.Node().Status.Conditions { - // TODO: There are other node status that the DaemonSet should ideally respect too, - // e.g. MemoryPressure, and DiskPressure - if c.Type == v1.NodeOutOfDisk && c.Status == v1.ConditionTrue { - reasons = append(reasons, predicates.ErrNodeOutOfDisk) - break - } - } - - return len(reasons) == 0, reasons -} - // byCreationTimestamp sorts a list by creation timestamp, using their names as a tie breaker. type byCreationTimestamp []*extensions.DaemonSet diff --git a/pkg/controller/daemon/daemon_controller_test.go b/pkg/controller/daemon/daemon_controller_test.go index 509e43084e22..d5d4786173a2 100644 --- a/pkg/controller/daemon/daemon_controller_test.go +++ b/pkg/controller/daemon/daemon_controller_test.go @@ -47,8 +47,6 @@ import ( "k8s.io/kubernetes/pkg/securitycontext" labelsutil "k8s.io/kubernetes/pkg/util/labels" "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm" - "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates" - "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" ) var ( @@ -503,20 +501,6 @@ func TestNotReadNodeDaemonDoesNotLaunchPod(t *testing.T) { } } -// DaemonSets should not place onto OutOfDisk nodes -func TestOutOfDiskNodeDaemonDoesNotLaunchPod(t *testing.T) { - for _, strategy := range updateStrategies() { - ds := newDaemonSet("foo") - ds.Spec.UpdateStrategy = *strategy - manager, podControl, _ := newTestController(ds) - node := newNode("not-enough-disk", nil) - node.Status.Conditions = []v1.NodeCondition{{Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}} - manager.nodeStore.Add(node) - manager.dsStore.Add(ds) - syncAndValidateDaemonSets(t, manager, ds, podControl, 0, 0, 0) - } -} - func resourcePodSpec(nodeName, memory, cpu string) v1.PodSpec { return v1.PodSpec{ NodeName: nodeName, @@ -1267,30 +1251,8 @@ func setDaemonSetToleration(ds *extensions.DaemonSet, tolerations []v1.Toleratio ds.Spec.Template.Spec.Tolerations = tolerations } -// DaemonSet should launch a critical pod even when the node is OutOfDisk. -func TestOutOfDiskNodeDaemonLaunchesCriticalPod(t *testing.T) { - for _, strategy := range updateStrategies() { - ds := newDaemonSet("critical") - ds.Spec.UpdateStrategy = *strategy - setDaemonSetCritical(ds) - manager, podControl, _ := newTestController(ds) - - node := newNode("not-enough-disk", nil) - node.Status.Conditions = []v1.NodeCondition{{Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}} - manager.nodeStore.Add(node) - - // Without enabling critical pod annotation feature gate, we shouldn't create critical pod - utilfeature.DefaultFeatureGate.Set("ExperimentalCriticalPodAnnotation=False") - manager.dsStore.Add(ds) - syncAndValidateDaemonSets(t, manager, ds, podControl, 0, 0, 0) - - // Enabling critical pod annotation feature gate should create critical pod - utilfeature.DefaultFeatureGate.Set("ExperimentalCriticalPodAnnotation=True") - syncAndValidateDaemonSets(t, manager, ds, podControl, 1, 0, 0) - } -} - // DaemonSet should launch a critical pod even when the node with OutOfDisk taints. +// TODO(#48843) OutOfDisk taints will be removed in 1.10 func TestTaintOutOfDiskNodeDaemonLaunchesCriticalPod(t *testing.T) { for _, strategy := range updateStrategies() { ds := newDaemonSet("critical") @@ -1454,23 +1416,6 @@ func TestNodeShouldRunDaemonPod(t *testing.T) { shouldSchedule: true, shouldContinueRunning: true, }, - { - ds: &extensions.DaemonSet{ - Spec: extensions.DaemonSetSpec{ - Selector: &metav1.LabelSelector{MatchLabels: simpleDaemonSetLabel}, - Template: v1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: simpleDaemonSetLabel, - }, - Spec: resourcePodSpec("", "50M", "0.5"), - }, - }, - }, - nodeCondition: []v1.NodeCondition{{Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}}, - wantToRun: true, - shouldSchedule: false, - shouldContinueRunning: true, - }, { ds: &extensions.DaemonSet{ Spec: extensions.DaemonSetSpec{ @@ -1612,41 +1557,6 @@ func TestUpdateNode(t *testing.T) { ds: newDaemonSet("ds"), shouldEnqueue: true, }, - { - test: "Node conditions changed", - oldNode: func() *v1.Node { - node := newNode("node1", nil) - node.Status.Conditions = []v1.NodeCondition{ - {Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}, - } - return node - }(), - newNode: newNode("node1", nil), - ds: newDaemonSet("ds"), - shouldEnqueue: true, - }, - { - test: "Node conditions not changed", - oldNode: func() *v1.Node { - node := newNode("node1", nil) - node.Status.Conditions = []v1.NodeCondition{ - {Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}, - {Type: v1.NodeMemoryPressure, Status: v1.ConditionFalse}, - {Type: v1.NodeDiskPressure, Status: v1.ConditionFalse}, - {Type: v1.NodeNetworkUnavailable, Status: v1.ConditionFalse}, - } - return node - }(), - newNode: func() *v1.Node { - node := newNode("node1", nil) - node.Status.Conditions = []v1.NodeCondition{ - {Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}, - } - return node - }(), - ds: newDaemonSet("ds"), - shouldEnqueue: false, - }, } for _, c := range cases { for _, strategy := range updateStrategies() { @@ -2205,57 +2115,3 @@ func getQueuedKeys(queue workqueue.RateLimitingInterface) []string { sort.Strings(keys) return keys } - -func TestPredicates(t *testing.T) { - type args struct { - pod *v1.Pod - node *v1.Node - } - tests := []struct { - name string - args args - want bool - wantRes []algorithm.PredicateFailureReason - wantErr bool - }{ - { - name: "retrun OutOfDiskErr if outOfDisk", - args: args{ - pod: newPod("pod1-", "node-0", nil, nil), - node: &v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node-0", - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - {Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}, - }, - Allocatable: v1.ResourceList{ - v1.ResourcePods: resource.MustParse("100"), - }, - }, - }, - }, - want: false, - wantRes: []algorithm.PredicateFailureReason{predicates.ErrNodeOutOfDisk}, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - nodeInfo := schedulercache.NewNodeInfo(tt.args.pod) - nodeInfo.SetNode(tt.args.node) - - got, res, err := Predicates(tt.args.pod, nodeInfo) - if (err != nil) != tt.wantErr { - t.Errorf("%s (error): error = %v, wantErr %v", tt.name, err, tt.wantErr) - return - } - if got != tt.want { - t.Errorf("%s (fit): got = %v, want %v", tt.name, got, tt.want) - } - if !reflect.DeepEqual(res, tt.wantRes) { - t.Errorf("%s (reasons): got = %v, want %v", tt.name, res, tt.wantRes) - } - }) - } -} diff --git a/pkg/controller/daemon/util/daemonset_util.go b/pkg/controller/daemon/util/daemonset_util.go index 58a467dbcddd..6ef70a3fac58 100644 --- a/pkg/controller/daemon/util/daemonset_util.go +++ b/pkg/controller/daemon/util/daemonset_util.go @@ -71,6 +71,7 @@ func CreatePodTemplate(template v1.PodTemplateSpec, generation int64, hash strin Effect: v1.TaintEffectNoSchedule, }) + // TODO(#48843) OutOfDisk taints will be removed in 1.10 if utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation) && kubelettypes.IsCritical(newTemplate.Namespace, newTemplate.Annotations) { v1helper.AddOrUpdateTolerationInPodSpec(&newTemplate.Spec, &v1.Toleration{ diff --git a/pkg/controller/node/node_controller.go b/pkg/controller/node/node_controller.go index d04799b3a7e3..335345c7f8d8 100644 --- a/pkg/controller/node/node_controller.go +++ b/pkg/controller/node/node_controller.go @@ -985,7 +985,6 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node // remaining node conditions should also be set to Unknown remainingNodeConditionTypes := []v1.NodeConditionType{ - v1.NodeOutOfDisk, v1.NodeMemoryPressure, v1.NodeDiskPressure, // We don't change 'NodeNetworkUnavailable' condition, as it's managed on a control plane level. diff --git a/pkg/controller/node/nodecontroller_test.go b/pkg/controller/node/nodecontroller_test.go index 13e738b75216..cb5b13602114 100644 --- a/pkg/controller/node/nodecontroller_test.go +++ b/pkg/controller/node/nodecontroller_test.go @@ -1455,14 +1455,6 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) { LastHeartbeatTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), LastTransitionTime: fakeNow, }, - { - Type: v1.NodeOutOfDisk, - Status: v1.ConditionUnknown, - Reason: "NodeStatusNeverUpdated", - Message: "Kubelet never posted node status.", - LastHeartbeatTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - LastTransitionTime: fakeNow, - }, { Type: v1.NodeMemoryPressure, Status: v1.ConditionUnknown, @@ -1520,13 +1512,6 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) { LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), }, - { - Type: v1.NodeOutOfDisk, - Status: v1.ConditionFalse, - // Node status hasn't been updated for 1hr. - LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - }, }, Capacity: v1.ResourceList{ v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"), @@ -1551,13 +1536,6 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) { LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), }, - { - Type: v1.NodeOutOfDisk, - Status: v1.ConditionFalse, - // Node status hasn't been updated for 1hr. - LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - }, }, Capacity: v1.ResourceList{ v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"), @@ -1580,14 +1558,6 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) { LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), LastTransitionTime: metav1.Time{Time: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC).Add(time.Hour)}, }, - { - Type: v1.NodeOutOfDisk, - Status: v1.ConditionUnknown, - Reason: "NodeStatusUnknown", - Message: "Kubelet stopped posting node status.", - LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - LastTransitionTime: metav1.Time{Time: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC).Add(time.Hour)}, - }, { Type: v1.NodeMemoryPressure, Status: v1.ConditionUnknown, @@ -1767,13 +1737,6 @@ func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) { LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), }, - { - Type: v1.NodeOutOfDisk, - Status: v1.ConditionFalse, - // Node status hasn't been updated for 1hr. - LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - }, }, Capacity: v1.ResourceList{ v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"), @@ -1800,13 +1763,6 @@ func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) { LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), }, - { - Type: v1.NodeOutOfDisk, - Status: v1.ConditionFalse, - // Node status hasn't been updated for 1hr. - LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - }, }, Capacity: v1.ResourceList{ v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"), @@ -1837,13 +1793,6 @@ func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) { LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), }, - { - Type: v1.NodeOutOfDisk, - Status: v1.ConditionFalse, - // Node status hasn't been updated for 1hr. - LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - }, }, Capacity: v1.ResourceList{ v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"), @@ -1870,13 +1819,6 @@ func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) { LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), }, - { - Type: v1.NodeOutOfDisk, - Status: v1.ConditionFalse, - // Node status hasn't been updated for 1hr. - LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - }, }, Capacity: v1.ResourceList{ v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"), diff --git a/test/e2e/node/BUILD b/test/e2e/node/BUILD index 343e5df55947..700e4000e8f3 100644 --- a/test/e2e/node/BUILD +++ b/test/e2e/node/BUILD @@ -1,9 +1,4 @@ -package(default_visibility = ["//visibility:public"]) - -load( - "@io_bazel_rules_go//go:def.bzl", - "go_library", -) +load("@io_bazel_rules_go//go:def.bzl", "go_library") go_library( name = "go_default_library", @@ -12,9 +7,9 @@ go_library( "framework.go", "kubelet.go", "kubelet_perf.go", - "nodeoutofdisk.go", "security_context.go", ], + visibility = ["//visibility:public"], deps = [ "//pkg/api/testapi:go_default_library", "//pkg/kubelet/apis/stats/v1alpha1:go_default_library", @@ -22,13 +17,10 @@ go_library( "//test/e2e/framework:go_default_library", "//test/utils:go_default_library", "//test/utils/image:go_default_library", - "//vendor/github.com/google/cadvisor/info/v1:go_default_library", "//vendor/github.com/onsi/ginkgo:go_default_library", "//vendor/github.com/onsi/gomega:go_default_library", "//vendor/k8s.io/api/core/v1:go_default_library", - "//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", - "//vendor/k8s.io/apimachinery/pkg/fields:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/uuid:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library", @@ -47,4 +39,5 @@ filegroup( name = "all-srcs", srcs = [":package-srcs"], tags = ["automanaged"], + visibility = ["//visibility:public"], ) diff --git a/test/e2e/node/nodeoutofdisk.go b/test/e2e/node/nodeoutofdisk.go deleted file mode 100644 index f2cf9dd2d2f6..000000000000 --- a/test/e2e/node/nodeoutofdisk.go +++ /dev/null @@ -1,269 +0,0 @@ -/* -Copyright 2015 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package node - -import ( - "encoding/json" - "fmt" - "time" - - cadvisorapi "github.com/google/cadvisor/info/v1" - "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/fields" - "k8s.io/apimachinery/pkg/util/wait" - clientset "k8s.io/client-go/kubernetes" - "k8s.io/kubernetes/test/e2e/framework" - - . "github.com/onsi/ginkgo" - . "github.com/onsi/gomega" -) - -const ( - mb = 1024 * 1024 - gb = 1024 * mb - - // TODO(madhusudancs): find a way to query kubelet's disk space manager to obtain this value. 256MB - // is the default that is set today. This test might break if the default value changes. This value - // can be configured by setting the "low-diskspace-threshold-mb" flag while starting a kubelet. - // However, kubelets are started as part of the cluster start up, once, before any e2e test is run, - // and remain unchanged until all the tests are run and the cluster is brought down. Changing the - // flag value affects all the e2e tests. So we are hard-coding this value for now. - lowDiskSpaceThreshold uint64 = 256 * mb - - nodeOODTimeOut = 5 * time.Minute - - numNodeOODPods = 3 -) - -// Plan: -// 1. Fill disk space on all nodes except one. One node is left out so that we can schedule pods -// on that node. Arbitrarily choose that node to be node with index 0. This makes this a disruptive test. -// 2. Get the CPU capacity on unfilled node. -// 3. Divide the available CPU into one less than the number of pods we want to schedule. We want -// to schedule 3 pods, so divide CPU capacity by 2. -// 4. Request the divided CPU for each pod. -// 5. Observe that 2 of the pods schedule onto the node whose disk is not full, and the remaining -// pod stays pending and does not schedule onto the nodes whose disks are full nor the node -// with the other two pods, since there is not enough free CPU capacity there. -// 6. Recover disk space from one of the nodes whose disk space was previously filled. Arbritrarily -// choose that node to be node with index 1. -// 7. Observe that the pod in pending status schedules on that node. -// -// Flaky issue #20015. We have no clear path for how to test this functionality in a non-flaky way. -var _ = SIGDescribe("NodeOutOfDisk [Serial] [Flaky] [Disruptive]", func() { - var c clientset.Interface - var unfilledNodeName, recoveredNodeName string - f := framework.NewDefaultFramework("node-outofdisk") - - BeforeEach(func() { - c = f.ClientSet - - framework.Skipf("test is broken. #40249") - - nodelist := framework.GetReadySchedulableNodesOrDie(c) - - // Skip this test on small clusters. No need to fail since it is not a use - // case that any cluster of small size needs to support. - framework.SkipUnlessNodeCountIsAtLeast(2) - - unfilledNodeName = nodelist.Items[0].Name - for _, node := range nodelist.Items[1:] { - fillDiskSpace(c, &node) - } - }) - - AfterEach(func() { - - nodelist := framework.GetReadySchedulableNodesOrDie(c) - Expect(len(nodelist.Items)).ToNot(BeZero()) - for _, node := range nodelist.Items { - if unfilledNodeName == node.Name || recoveredNodeName == node.Name { - continue - } - recoverDiskSpace(c, &node) - } - }) - - It("runs out of disk space", func() { - unfilledNode, err := c.Core().Nodes().Get(unfilledNodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - - By(fmt.Sprintf("Calculating CPU availability on node %s", unfilledNode.Name)) - milliCpu, err := availCpu(c, unfilledNode) - framework.ExpectNoError(err) - - // Per pod CPU should be just enough to fit only (numNodeOODPods - 1) pods on the given - // node. We compute this value by dividing the available CPU capacity on the node by - // (numNodeOODPods - 1) and subtracting ϵ from it. We arbitrarily choose ϵ to be 1% - // of the available CPU per pod, i.e. 0.01 * milliCpu/(numNodeOODPods-1). Instead of - // subtracting 1% from the value, we directly use 0.99 as the multiplier. - podCPU := int64(float64(milliCpu/(numNodeOODPods-1)) * 0.99) - - ns := f.Namespace.Name - podClient := c.Core().Pods(ns) - - By("Creating pods and waiting for all but one pods to be scheduled") - - for i := 0; i < numNodeOODPods-1; i++ { - name := fmt.Sprintf("pod-node-outofdisk-%d", i) - createOutOfDiskPod(c, ns, name, podCPU) - - framework.ExpectNoError(f.WaitForPodRunning(name)) - pod, err := podClient.Get(name, metav1.GetOptions{}) - framework.ExpectNoError(err) - Expect(pod.Spec.NodeName).To(Equal(unfilledNodeName)) - } - - pendingPodName := fmt.Sprintf("pod-node-outofdisk-%d", numNodeOODPods-1) - createOutOfDiskPod(c, ns, pendingPodName, podCPU) - - By(fmt.Sprintf("Finding a failed scheduler event for pod %s", pendingPodName)) - wait.Poll(2*time.Second, 5*time.Minute, func() (bool, error) { - selector := fields.Set{ - "involvedObject.kind": "Pod", - "involvedObject.name": pendingPodName, - "involvedObject.namespace": ns, - "source": v1.DefaultSchedulerName, - "reason": "FailedScheduling", - }.AsSelector().String() - options := metav1.ListOptions{FieldSelector: selector} - schedEvents, err := c.Core().Events(ns).List(options) - framework.ExpectNoError(err) - - if len(schedEvents.Items) > 0 { - return true, nil - } else { - return false, nil - } - }) - - nodelist := framework.GetReadySchedulableNodesOrDie(c) - Expect(len(nodelist.Items)).To(BeNumerically(">", 1)) - - nodeToRecover := nodelist.Items[1] - Expect(nodeToRecover.Name).ToNot(Equal(unfilledNodeName)) - - recoverDiskSpace(c, &nodeToRecover) - recoveredNodeName = nodeToRecover.Name - - By(fmt.Sprintf("Verifying that pod %s schedules on node %s", pendingPodName, recoveredNodeName)) - framework.ExpectNoError(f.WaitForPodRunning(pendingPodName)) - pendingPod, err := podClient.Get(pendingPodName, metav1.GetOptions{}) - framework.ExpectNoError(err) - Expect(pendingPod.Spec.NodeName).To(Equal(recoveredNodeName)) - }) -}) - -// createOutOfDiskPod creates a pod in the given namespace with the requested amount of CPU. -func createOutOfDiskPod(c clientset.Interface, ns, name string, milliCPU int64) { - podClient := c.Core().Pods(ns) - - pod := &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - }, - Spec: v1.PodSpec{ - Containers: []v1.Container{ - { - Name: "pause", - Image: framework.GetPauseImageName(c), - Resources: v1.ResourceRequirements{ - Requests: v1.ResourceList{ - // Request enough CPU to fit only two pods on a given node. - v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI), - }, - }, - }, - }, - }, - } - - _, err := podClient.Create(pod) - framework.ExpectNoError(err) -} - -// availCpu calculates the available CPU on a given node by subtracting the CPU requested by -// all the pods from the total available CPU capacity on the node. -func availCpu(c clientset.Interface, node *v1.Node) (int64, error) { - podClient := c.Core().Pods(metav1.NamespaceAll) - - selector := fields.Set{"spec.nodeName": node.Name}.AsSelector().String() - options := metav1.ListOptions{FieldSelector: selector} - pods, err := podClient.List(options) - if err != nil { - return 0, fmt.Errorf("failed to retrieve all the pods on node %s: %v", node.Name, err) - } - avail := node.Status.Capacity.Cpu().MilliValue() - for _, pod := range pods.Items { - for _, cont := range pod.Spec.Containers { - avail -= cont.Resources.Requests.Cpu().MilliValue() - } - } - return avail, nil -} - -// availSize returns the available disk space on a given node by querying node stats which -// is in turn obtained internally from cadvisor. -func availSize(c clientset.Interface, node *v1.Node) (uint64, error) { - statsResource := fmt.Sprintf("api/v1/proxy/nodes/%s/stats/", node.Name) - framework.Logf("Querying stats for node %s using url %s", node.Name, statsResource) - res, err := c.Core().RESTClient().Get().AbsPath(statsResource).Timeout(time.Minute).Do().Raw() - if err != nil { - return 0, fmt.Errorf("error querying cAdvisor API: %v", err) - } - ci := cadvisorapi.ContainerInfo{} - err = json.Unmarshal(res, &ci) - if err != nil { - return 0, fmt.Errorf("couldn't unmarshal container info: %v", err) - } - return ci.Stats[len(ci.Stats)-1].Filesystem[0].Available, nil -} - -// fillDiskSpace fills the available disk space on a given node by creating a large file. The disk -// space on the node is filled in such a way that the available space after filling the disk is just -// below the lowDiskSpaceThreshold mark. -func fillDiskSpace(c clientset.Interface, node *v1.Node) { - avail, err := availSize(c, node) - framework.ExpectNoError(err, "Node %s: couldn't obtain available disk size %v", node.Name, err) - - fillSize := (avail - lowDiskSpaceThreshold + (100 * mb)) - - framework.Logf("Node %s: disk space available %d bytes", node.Name, avail) - By(fmt.Sprintf("Node %s: creating a file of size %d bytes to fill the available disk space", node.Name, fillSize)) - - cmd := fmt.Sprintf("fallocate -l %d test.img", fillSize) - framework.ExpectNoError(framework.IssueSSHCommand(cmd, framework.TestContext.Provider, node)) - - ood := framework.WaitForNodeToBe(c, node.Name, v1.NodeOutOfDisk, true, nodeOODTimeOut) - Expect(ood).To(BeTrue(), "Node %s did not run out of disk within %v", node.Name, nodeOODTimeOut) - - avail, err = availSize(c, node) - framework.Logf("Node %s: disk space available %d bytes", node.Name, avail) - Expect(avail < lowDiskSpaceThreshold).To(BeTrue()) -} - -// recoverDiskSpace recovers disk space, filled by creating a large file, on a given node. -func recoverDiskSpace(c clientset.Interface, node *v1.Node) { - By(fmt.Sprintf("Recovering disk space on node %s", node.Name)) - cmd := "rm -f test.img" - framework.ExpectNoError(framework.IssueSSHCommand(cmd, framework.TestContext.Provider, node)) - - ood := framework.WaitForNodeToBe(c, node.Name, v1.NodeOutOfDisk, false, nodeOODTimeOut) - Expect(ood).To(BeTrue(), "Node %s's out of disk condition status did not change to false within %v", node.Name, nodeOODTimeOut) -}