Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow for not-ready nodes in e2e test #32108

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions hack/verify-flags/known-flags.txt
Expand Up @@ -8,6 +8,7 @@ algorithm-provider
all-namespaces
allocate-node-cidrs
allow-privileged
allowed-not-ready-nodes
api-burst
api-prefix
api-rate
Expand Down
2 changes: 2 additions & 0 deletions test/e2e/framework/test_context.go
Expand Up @@ -52,6 +52,7 @@ type TestContextType struct {
NodeOSDistro string
VerifyServiceAccount bool
DeleteNamespace bool
AllowedNotReadyNodes int
CleanStart bool
// If set to 'true' or 'all' framework will start a goroutine monitoring resource usage of system add-ons.
// It will read the data every 30 seconds from all Nodes and print summary during afterEach. If set to 'master'
Expand Down Expand Up @@ -120,6 +121,7 @@ func RegisterCommonFlags() {
flag.StringVar(&TestContext.OutputPrintType, "output-print-type", "hr", "Comma separated list: 'hr' for human readable summaries 'json' for JSON ones.")
flag.BoolVar(&TestContext.DumpLogsOnFailure, "dump-logs-on-failure", true, "If set to true test will dump data about the namespace in which test was running.")
flag.BoolVar(&TestContext.DeleteNamespace, "delete-namespace", true, "If true tests will delete namespace after completion. It is only designed to make debugging easier, DO NOT turn it off by default.")
flag.IntVar(&TestContext.AllowedNotReadyNodes, "allowed-not-ready-nodes", 0, "If non-zero, framework will allow for that many non-ready nodes when checking for all ready nodes.")
flag.StringVar(&TestContext.Host, "host", "http://127.0.0.1:8080", "The host, or apiserver, to connect to")
flag.StringVar(&TestContext.ReportPrefix, "report-prefix", "", "Optional prefix for JUnit XML reports. Default is empty, which doesn't prepend anything to the default name.")
flag.StringVar(&TestContext.ReportDir, "report-dir", "", "Path to the directory where the JUnit XML reports should be saved. Default is empty, which doesn't generate these reports.")
Expand Down
36 changes: 32 additions & 4 deletions test/e2e/framework/util.go
Expand Up @@ -4189,27 +4189,55 @@ func WaitForNodeToBe(c *client.Client, name string, conditionType api.NodeCondit
return false
}

// Checks whether not-ready nodes can be ignored while checking if all nodes are
// ready (we allow e.g. for incorrect provisioning of some small percentage of nodes
// while validating cluster, and those nodes may never become healthy).
// Currently we allow only for:
// - not present CNI plugins on node
// TODO: we should extend it for other reasons.
func allowedNotReadyReasons(nodes []*api.Node) bool {
for _, node := range nodes {
index, condition := api.GetNodeCondition(&node.Status, api.NodeReady)
if index == -1 ||
!strings.Contains(condition.Reason, "could not locate kubenet required CNI plugins") {
return false
}
}
return true
}

// Checks whether all registered nodes are ready.
// TODO: we should change the AllNodesReady call in AfterEach to WaitForAllNodesHealthy,
// and figure out how to do it in a configurable way, as we can't expect all setups to run
// default test add-ons.
func AllNodesReady(c *client.Client, timeout time.Duration) error {
Logf("Waiting up to %v for all nodes to be ready", timeout)

var notReady []api.Node
var notReady []*api.Node
err := wait.PollImmediate(Poll, timeout, func() (bool, error) {
notReady = nil
// It should be OK to list unschedulable Nodes here.
nodes, err := c.Nodes().List(api.ListOptions{})
if err != nil {
return false, err
}
for _, node := range nodes.Items {
if !IsNodeConditionSetAsExpected(&node, api.NodeReady, true) {
for i := range nodes.Items {
node := &nodes.Items[i]
if !IsNodeConditionSetAsExpected(node, api.NodeReady, true) {
notReady = append(notReady, node)
}
}
return len(notReady) == 0, nil
// Framework allows for <TestContext.AllowedNotReadyNodes> nodes to be non-ready,
// to make it possible e.g. for incorrect deployment of some small percentage
// of nodes (which we allow in cluster validation). Some nodes that are not
// provisioned correctly at startup will never become ready (e.g. when something
// won't install correctly), so we can't expect them to be ready at any point.
//
// However, we only allow non-ready nodes with some specific reasons.
if len(notReady) > TestContext.AllowedNotReadyNodes {
return false, nil
}
return allowedNotReadyReasons(notReady), nil
})

if err != nil && err != wait.ErrWaitTimeout {
Expand Down