-
Notifications
You must be signed in to change notification settings - Fork 453
/
default.go
153 lines (130 loc) · 6.01 KB
/
default.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package check
import (
"context"
"time"
"github.com/siderolabs/talos/pkg/conditions"
"github.com/siderolabs/talos/pkg/machinery/config/machine"
)
// DefaultClusterChecks returns a set of default Talos cluster readiness checks.
func DefaultClusterChecks() []ClusterCheck {
return append(PreBootSequenceChecks(), []ClusterCheck{
// wait for all the nodes to report in at k8s level
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("all k8s nodes to report", func(ctx context.Context) error {
return K8sAllNodesReportedAssertion(ctx, cluster)
}, 5*time.Minute, 30*time.Second) // give more time per each attempt, as this check is going to build and cache kubeconfig
},
// wait for all the nodes to report ready at k8s level
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("all k8s nodes to report ready", func(ctx context.Context) error {
return K8sAllNodesReadyAssertion(ctx, cluster)
}, 10*time.Minute, 5*time.Second)
},
// wait for k8s control plane static pods
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("all control plane static pods to be running", func(ctx context.Context) error {
return K8sControlPlaneStaticPods(ctx, cluster)
}, 5*time.Minute, 5*time.Second)
},
// wait for HA k8s control plane
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("all control plane components to be ready", func(ctx context.Context) error {
return K8sFullControlPlaneAssertion(ctx, cluster)
}, 5*time.Minute, 5*time.Second)
},
// wait for kube-proxy to report ready
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("kube-proxy to report ready", func(ctx context.Context) error {
present, err := DaemonSetPresent(ctx, cluster, "kube-system", "k8s-app=kube-proxy")
if err != nil {
return err
}
if !present {
return conditions.ErrSkipAssertion
}
return K8sPodReadyAssertion(ctx, cluster, "kube-system", "k8s-app=kube-proxy")
}, 3*time.Minute, 5*time.Second)
},
// wait for coredns to report ready
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("coredns to report ready", func(ctx context.Context) error {
present, err := ReplicaSetPresent(ctx, cluster, "kube-system", "k8s-app=kube-dns")
if err != nil {
return err
}
if !present {
return conditions.ErrSkipAssertion
}
return K8sPodReadyAssertion(ctx, cluster, "kube-system", "k8s-app=kube-dns")
}, 3*time.Minute, 5*time.Second)
},
// wait for all the nodes to be schedulable
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("all k8s nodes to report schedulable", func(ctx context.Context) error {
return K8sAllNodesSchedulableAssertion(ctx, cluster)
}, 5*time.Minute, 5*time.Second)
},
}...)
}
// ExtraClusterChecks returns a set of additional Talos cluster readiness checks which work only for newer versions of Talos.
//
// ExtraClusterChecks can't be used reliably in upgrade tests, as older versions might not pass the checks.
func ExtraClusterChecks() []ClusterCheck {
return []ClusterCheck{}
}
// PreBootSequenceChecks returns a set of Talos cluster readiness checks which are run before boot sequence.
func PreBootSequenceChecks() []ClusterCheck {
return []ClusterCheck{
// wait for etcd to be healthy on all control plane nodes
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("etcd to be healthy", func(ctx context.Context) error {
return ServiceHealthAssertion(ctx, cluster, "etcd", WithNodeTypes(machine.TypeInit, machine.TypeControlPlane))
}, 5*time.Minute, 5*time.Second)
},
// wait for etcd members to be consistent across nodes
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("etcd members to be consistent across nodes", func(ctx context.Context) error {
return EtcdConsistentAssertion(ctx, cluster)
}, 5*time.Minute, 5*time.Second)
},
// wait for etcd members to be the control plane nodes
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("etcd members to be control plane nodes", func(ctx context.Context) error {
return EtcdControlPlaneNodesAssertion(ctx, cluster)
}, 5*time.Minute, 5*time.Second)
},
// wait for apid to be ready on all the nodes
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("apid to be ready", func(ctx context.Context) error {
return ApidReadyAssertion(ctx, cluster)
}, 5*time.Minute, 5*time.Second)
},
// wait for all nodes to report their memory size
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("all nodes memory sizes", func(ctx context.Context) error {
return AllNodesMemorySizes(ctx, cluster)
}, 5*time.Minute, 5*time.Second)
},
// wait for all nodes to report their disk size
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("all nodes disk sizes", func(ctx context.Context) error {
return AllNodesDiskSizes(ctx, cluster)
}, 5*time.Minute, 5*time.Second)
},
// wait for kubelet to be healthy on all
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("kubelet to be healthy", func(ctx context.Context) error {
return ServiceHealthAssertion(ctx, cluster, "kubelet", WithNodeTypes(machine.TypeInit, machine.TypeControlPlane))
}, 5*time.Minute, 5*time.Second)
},
// wait for all nodes to finish booting
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("all nodes to finish boot sequence", func(ctx context.Context) error {
return AllNodesBootedAssertion(ctx, cluster)
}, 5*time.Minute, 5*time.Second)
},
}
}