-
Notifications
You must be signed in to change notification settings - Fork 4.7k
/
helpers.go
588 lines (513 loc) · 23.6 KB
/
helpers.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
package helpers
import (
"context"
"fmt"
"sort"
"strings"
"time"
"github.com/davecgh/go-spew/spew"
o "github.com/onsi/gomega"
configv1 "github.com/openshift/api/config/v1"
machinev1 "github.com/openshift/api/machine/v1"
machinev1beta1 "github.com/openshift/api/machine/v1beta1"
machineclient "github.com/openshift/client-go/machine/clientset/versioned"
machinev1client "github.com/openshift/client-go/machine/clientset/versioned/typed/machine/v1"
machinev1beta1client "github.com/openshift/client-go/machine/clientset/versioned/typed/machine/v1beta1"
bmhelper "github.com/openshift/origin/test/extended/baremetal"
exutil "github.com/openshift/origin/test/extended/util"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/net"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/dynamic"
"k8s.io/client-go/kubernetes"
v1 "k8s.io/client-go/kubernetes/typed/core/v1"
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
"k8s.io/utils/pointer"
)
const masterMachineLabelSelector = "machine.openshift.io/cluster-api-machine-role" + "=" + "master"
const machineDeletionHookName = "EtcdQuorumOperator"
const machineDeletionHookOwner = "clusteroperator/etcd"
const masterNodeRoleLabel = "node-role.kubernetes.io/master"
type TestingT interface {
Logf(format string, args ...interface{})
}
// CreateNewMasterMachine creates a new master node by cloning an existing Machine resource
func CreateNewMasterMachine(ctx context.Context, t TestingT, machineClient machinev1beta1client.MachineInterface) (string, error) {
machineList, err := machineClient.List(ctx, metav1.ListOptions{LabelSelector: masterMachineLabelSelector})
if err != nil {
return "", err
}
var machineToClone *machinev1beta1.Machine
for _, machine := range machineList.Items {
machinePhase := pointer.StringDeref(machine.Status.Phase, "Unknown")
if machinePhase == "Running" {
machineToClone = &machine
break
}
t.Logf("%q machine is in unexpected %q state", machine.Name, machinePhase)
}
if machineToClone == nil {
return "", fmt.Errorf("unable to find a running master machine to clone")
}
// assigning a new Name and clearing ProviderID is enough
// for MAO to pick it up and provision a new master machine/node
machineToClone.Name = fmt.Sprintf("%s-clone", machineToClone.Name)
machineToClone.Spec.ProviderID = nil
machineToClone.ResourceVersion = ""
machineToClone.Annotations = map[string]string{}
machineToClone.Spec.LifecycleHooks = machinev1beta1.LifecycleHooks{}
clonedMachine, err := machineClient.Create(context.TODO(), machineToClone, metav1.CreateOptions{})
if err != nil {
return "", err
}
t.Logf("Created a new master machine/node %q", clonedMachine.Name)
return clonedMachine.Name, nil
}
func EnsureMasterMachine(ctx context.Context, t TestingT, machineName string, machineClient machinev1beta1client.MachineInterface) error {
waitPollInterval := 15 * time.Second
// This timeout should be tuned for the platform that takes the longest to provision a node and result
// in a Running machine phase.
waitPollTimeout := 25 * time.Minute
t.Logf("Waiting up to %s for %q machine to be in the Running state", waitPollTimeout.String(), machineName)
return wait.Poll(waitPollInterval, waitPollTimeout, func() (bool, error) {
machine, err := machineClient.Get(ctx, machineName, metav1.GetOptions{})
if err != nil {
return false, err
}
machinePhase := pointer.StringDeref(machine.Status.Phase, "Unknown")
t.Logf("%q machine is in %q state", machineName, machinePhase)
if machinePhase != "Running" {
return false, nil
}
if !hasMachineDeletionHook(machine) {
// it takes some time to add the hook
t.Logf("%q machine doesn't have required deletion hooks", machine.Name)
return false, nil
}
return true, nil
})
}
// EnsureInitialClusterState makes sure the cluster state is expected, that is, has only 3 running machines and exactly 3 voting members
// otherwise it attempts to recover the cluster by removing any excessive machines
func EnsureInitialClusterState(ctx context.Context, t TestingT, etcdClientFactory EtcdClientCreator, machineClient machinev1beta1client.MachineInterface, kubeClient kubernetes.Interface) error {
if err := recoverClusterToInitialStateIfNeeded(ctx, t, machineClient); err != nil {
return err
}
if err := EnsureVotingMembersCount(ctx, t, etcdClientFactory, kubeClient, 3); err != nil {
return err
}
return EnsureMasterMachinesAndCount(ctx, t, machineClient)
}
// EnsureMasterMachinesAndCount checks if there are only 3 running master machines otherwise it returns an error
func EnsureMasterMachinesAndCount(ctx context.Context, t TestingT, machineClient machinev1beta1client.MachineInterface) error {
waitPollInterval := 15 * time.Second
waitPollTimeout := 10 * time.Minute
t.Logf("Waiting up to %s for the cluster to reach the expected machines count of 3", waitPollTimeout.String())
return wait.Poll(waitPollInterval, waitPollTimeout, func() (bool, error) {
machineList, err := machineClient.List(ctx, metav1.ListOptions{LabelSelector: masterMachineLabelSelector})
if err != nil {
return isTransientAPIError(t, err)
}
if len(machineList.Items) != 3 {
var machineNames []string
for _, machine := range machineList.Items {
machineNames = append(machineNames, machine.Name)
}
t.Logf("expected exactly 3 master machines, got %d, machines are: %v", len(machineList.Items), machineNames)
return false, nil
}
for _, machine := range machineList.Items {
machinePhase := pointer.StringDeref(machine.Status.Phase, "")
if machinePhase != "Running" {
return false, fmt.Errorf("%q machine is in unexpected %q state, expected Running", machine.Name, machinePhase)
}
if !hasMachineDeletionHook(&machine) {
return false, fmt.Errorf("%q machine doesn't have required deletion hooks", machine.Name)
}
}
return true, nil
})
}
func recoverClusterToInitialStateIfNeeded(ctx context.Context, t TestingT, machineClient machinev1beta1client.MachineInterface) error {
waitPollInterval := 15 * time.Second
waitPollTimeout := 5 * time.Minute
t.Logf("Trying up to %s to recover the cluster to its initial state", waitPollTimeout.String())
return wait.Poll(waitPollInterval, waitPollTimeout, func() (bool, error) {
machineList, err := machineClient.List(ctx, metav1.ListOptions{LabelSelector: masterMachineLabelSelector})
if err != nil {
return isTransientAPIError(t, err)
}
var machineNames []string
for _, machine := range machineList.Items {
machineNames = append(machineNames, machine.Name)
}
t.Logf("checking if there are any excessive machines in the cluster (created by a previous test), expected cluster size is 3, found %v machines: %v", len(machineList.Items), machineNames)
for _, machine := range machineList.Items {
if strings.HasSuffix(machine.Name, "-clone") {
// first forcefully remove the hooks
machine.Spec.LifecycleHooks = machinev1beta1.LifecycleHooks{}
if _, err := machineClient.Update(ctx, &machine, metav1.UpdateOptions{}); err != nil {
return isTransientAPIError(t, err)
}
// then the machine
if err := machineClient.Delete(ctx, machine.Name, metav1.DeleteOptions{}); err != nil {
return isTransientAPIError(t, err)
}
t.Logf("successfully deleted an excessive machine %q from the API (perhaps, created by a previous test)", machine.Name)
}
}
return true, nil
})
}
func DeleteSingleMachine(ctx context.Context, t TestingT, machineClient machinev1beta1client.MachineInterface) (string, error) {
machineToDelete := ""
// list master machines
machineList, err := machineClient.List(ctx, metav1.ListOptions{LabelSelector: masterMachineLabelSelector})
if err != nil {
return "", fmt.Errorf("error listing master machines: '%w'", err)
}
// Machine names are suffixed with an index number (e.g "ci-op-xlbdrkvl-6a467-qcbkh-master-0")
// so we sort to pick the lowest index, e.g master-0 in this example
machineNames := []string{}
for _, m := range machineList.Items {
machineNames = append(machineNames, m.Name)
}
sort.Strings(machineNames)
machineToDelete = machineNames[0]
t.Logf("attempting to delete machine '%q'", machineToDelete)
if err := machineClient.Delete(ctx, machineToDelete, metav1.DeleteOptions{}); err != nil {
if apierrors.IsNotFound(err) {
t.Logf("machine '%q' was listed but not found or already deleted", machineToDelete)
return "", nil
}
return "", err
}
t.Logf("successfully deleted machine '%q'", machineToDelete)
return machineToDelete, nil
}
// IsCPMSActive returns true if the current platform's has an active CPMS
// Not all platforms are supported (as of 4.12 only AWS and Azure)
// See https://github.com/openshift/cluster-control-plane-machine-set-operator/tree/main/docs/user#supported-platforms
func IsCPMSActive(ctx context.Context, t TestingT, cpmsClient machinev1client.ControlPlaneMachineSetInterface) (bool, error) {
// The CPMS singleton in the "openshift-machine-api" namespace is named "cluster"
// https://github.com/openshift/cluster-control-plane-machine-set-operator/blob/bba395abab62fc12de4a9b9b030700546f4b822e/pkg/controllers/controlplanemachineset/controller.go#L50-L53
cpms, err := cpmsClient.Get(ctx, "cluster", metav1.GetOptions{})
if err != nil {
if apierrors.IsNotFound(err) {
return false, nil
}
return false, err
}
// The CPMS state must be active in order for the platform to be supported
// See https://github.com/openshift/cluster-control-plane-machine-set-operator/blob/7961d1457c6aef26d3b1dafae962da2a2aba18ef/docs/user/installation.md#anatomy-of-a-controlplanemachineset
if cpms.Spec.State != machinev1.ControlPlaneMachineSetStateActive {
return false, nil
}
return true, nil
}
// EnsureReadyReplicasOnCPMS checks if status.readyReplicas on the cluster CPMS is n
// this effectively counts the number of control-plane machines with the provider state as running
func EnsureReadyReplicasOnCPMS(ctx context.Context, t TestingT, expectedReplicaCount int, cpmsClient machinev1client.ControlPlaneMachineSetInterface, nodeClient v1.NodeInterface) error {
waitPollInterval := 5 * time.Second
waitPollTimeout := 18 * time.Minute
t.Logf("Waiting up to %s for the CPMS to have status.readyReplicas = %v", waitPollTimeout.String(), expectedReplicaCount)
return wait.Poll(waitPollInterval, waitPollTimeout, func() (bool, error) {
cpms, err := cpmsClient.Get(ctx, "cluster", metav1.GetOptions{})
if err != nil {
return isTransientAPIError(t, err)
}
if cpms.Status.ReadyReplicas != int32(expectedReplicaCount) {
t.Logf("expected %d ready replicas on CPMS, got: %v,", expectedReplicaCount, cpms.Status.ReadyReplicas)
return false, nil
}
t.Logf("CPMS has reached the desired number of ready replicas: %v,", cpms.Status.ReadyReplicas)
err = EnsureReadyMasterNodes(ctx, expectedReplicaCount, nodeClient)
if err != nil {
t.Logf("expected number of master nodes is not ready yet: '%w'", err)
return false, nil
}
return true, nil
})
}
// EnsureReadyMasterNodes checks if the current master nodes matches the expected number of master nodes,
// and that all master nodes' are Ready
func EnsureReadyMasterNodes(ctx context.Context, expectedReplicaCount int, nodeClient v1.NodeInterface) error {
masterNodes, err := nodeClient.List(ctx, metav1.ListOptions{LabelSelector: masterNodeRoleLabel})
if err != nil {
return fmt.Errorf("failed to list master nodes:'%w'", err)
}
if len(masterNodes.Items) != expectedReplicaCount {
return fmt.Errorf("expected number of master nodes is '%d', but got '%d' instead", expectedReplicaCount, len(masterNodes.Items))
}
for _, node := range masterNodes.Items {
for _, condition := range node.Status.Conditions {
if condition.Type == corev1.NodeReady && condition.Status != corev1.ConditionTrue {
return fmt.Errorf("master node '%v' is not ready", node)
}
}
}
return nil
}
// EnsureCPMSReplicasConverged returns error if the number of expected master machines not equals the number of actual master machines
// otherwise it returns nil
func EnsureCPMSReplicasConverged(ctx context.Context, cpmsClient machinev1client.ControlPlaneMachineSetInterface) error {
cpms, err := cpmsClient.Get(ctx, "cluster", metav1.GetOptions{})
if err != nil {
return fmt.Errorf("failed to get controlPlaneMachineSet object: '%w'", err)
}
if *cpms.Spec.Replicas != cpms.Status.ReadyReplicas {
return fmt.Errorf("CPMS replicas failed to converge, expected status.readyReplicas '%d' to be equal to spec.replicas '%v'", cpms.Status.ReadyReplicas, cpms.Spec.Replicas)
}
return nil
}
// EnsureVotingMembersCount counts the number of voting etcd members, it doesn't evaluate health conditions or any other attributes (i.e. name) of individual members
// this method won't fail immediately on errors, this is useful during scaling down operation until the feature can ensure this operation to be graceful
func EnsureVotingMembersCount(ctx context.Context, t TestingT, etcdClientFactory EtcdClientCreator, kubeClient kubernetes.Interface, expectedMembersCount int) error {
waitPollInterval := 15 * time.Second
waitPollTimeout := 10 * time.Minute
t.Logf("Waiting up to %s for the cluster to reach the expected member count of %v", waitPollTimeout.String(), expectedMembersCount)
return wait.Poll(waitPollInterval, waitPollTimeout, func() (bool, error) {
etcdClient, closeFn, err := etcdClientFactory.NewEtcdClient()
if err != nil {
t.Logf("failed to get etcd client, will retry, err: %v", err)
return false, nil
}
defer closeFn()
ctx, cancel := context.WithTimeout(ctx, 15*time.Second)
defer cancel()
memberList, err := etcdClient.MemberList(ctx)
if err != nil {
t.Logf("failed to get the member list, will retry, err: %v", err)
return false, nil
}
var votingMemberNames []string
for _, member := range memberList.Members {
if !member.IsLearner {
votingMemberNames = append(votingMemberNames, member.Name)
}
}
if len(votingMemberNames) != expectedMembersCount {
t.Logf("unexpected number of voting etcd members, expected exactly %d, got: %v, current members are: %v", expectedMembersCount, len(votingMemberNames), votingMemberNames)
return false, nil
}
t.Logf("cluster has reached the expected number of %v voting members, the members are: %v", expectedMembersCount, votingMemberNames)
t.Logf("ensuring that the openshift-etcd/etcd-endpoints cm has the expected number of %v voting members", expectedMembersCount)
etcdEndpointsConfigMap, err := kubeClient.CoreV1().ConfigMaps("openshift-etcd").Get(ctx, "etcd-endpoints", metav1.GetOptions{})
if err != nil {
return false, err
}
currentVotingMemberIPListSet := sets.NewString()
for _, votingMemberIP := range etcdEndpointsConfigMap.Data {
currentVotingMemberIPListSet.Insert(votingMemberIP)
}
if currentVotingMemberIPListSet.Len() != expectedMembersCount {
t.Logf("unexpected number of voting members in the openshift-etcd/etcd-endpoints cm, expected exactly %d, got: %v, current members are: %v", expectedMembersCount, currentVotingMemberIPListSet.Len(), currentVotingMemberIPListSet.List())
return false, nil
}
return true, nil
})
}
func EnsureMemberRemoved(t TestingT, etcdClientFactory EtcdClientCreator, memberName string) error {
waitPollInterval := 15 * time.Second
waitPollTimeout := 1 * time.Minute
t.Logf("Waiting up to %s for %v member to be removed from the cluster", waitPollTimeout.String(), memberName)
return wait.Poll(waitPollInterval, waitPollTimeout, func() (bool, error) {
etcdClient, closeFn, err := etcdClientFactory.NewEtcdClient()
if err != nil {
t.Logf("failed to get etcd client, will retry, err: %v", err)
return false, nil
}
defer closeFn()
ctx, cancel := context.WithTimeout(context.TODO(), 15*time.Second)
defer cancel()
rsp, err := etcdClient.MemberList(ctx)
if err != nil {
t.Logf("failed to get member list, will retry, err: %v", err)
return false, nil
}
for _, member := range rsp.Members {
if member.Name == memberName {
return false, fmt.Errorf("member %v hasn't been removed", spew.Sdump(member))
}
}
return true, nil
})
}
func EnsureHealthyMember(t TestingT, etcdClientFactory EtcdClientCreator, memberName string) error {
etcdClient, closeFn, err := etcdClientFactory.NewEtcdClientForMember(memberName)
if err != nil {
return err
}
defer closeFn()
ctx, cancel := context.WithTimeout(context.TODO(), 15*time.Second)
defer cancel()
// We know it's a voting member so lineared read is fine
_, err = etcdClient.Get(ctx, "health")
if err != nil {
return fmt.Errorf("failed to check healthiness condition of the %q member, err: %v", memberName, err)
}
t.Logf("successfully evaluated health condition of %q member", memberName)
return nil
}
// MachineNameToEtcdMemberName finds an etcd member name that corresponds to the given machine name
// first it looks up a node that corresponds to the machine by comparing the ProviderID field
// next, it returns the node name as it is used to name an etcd member.
//
// # In cases the ProviderID is empty it will try to find a node that matches an internal IP address
//
// note:
// it will exit and report an error in case the node was not found
func MachineNameToEtcdMemberName(ctx context.Context, kubeClient kubernetes.Interface, machineClient machinev1beta1client.MachineInterface, machineName string) (string, error) {
machine, err := machineClient.Get(ctx, machineName, metav1.GetOptions{})
if err != nil {
return "", err
}
masterNodes, err := kubeClient.CoreV1().Nodes().List(ctx, metav1.ListOptions{LabelSelector: masterNodeRoleLabel})
if err != nil {
return "", err
}
machineProviderID := pointer.StringDeref(machine.Spec.ProviderID, "")
if len(machineProviderID) != 0 {
// case 1: find corresponding node, match on providerID
var nodeNames []string
for _, masterNode := range masterNodes.Items {
if masterNode.Spec.ProviderID == machineProviderID {
return masterNode.Name, nil
}
nodeNames = append(nodeNames, masterNode.Name)
}
return "", fmt.Errorf("unable to find a node for the corresponding %q machine on ProviderID: %v, checked: %v", machineName, machineProviderID, nodeNames)
}
// case 2: match on an internal ip address
machineIPListSet := sets.NewString()
for _, addr := range machine.Status.Addresses {
if addr.Type == corev1.NodeInternalIP {
machineIPListSet.Insert(addr.Address)
}
}
var nodeNames []string
for _, masterNode := range masterNodes.Items {
for _, addr := range masterNode.Status.Addresses {
if addr.Type == corev1.NodeInternalIP {
if machineIPListSet.Has(addr.Address) {
return masterNode.Name, nil
}
}
nodeNames = append(nodeNames, masterNode.Name)
}
}
return "", fmt.Errorf("unable to find a node for the corresponding %q machine on the following machine's IPs: %v, checked: %v", machineName, machineIPListSet.List(), nodeNames)
}
func InitPlatformSpecificConfiguration(oc *exutil.CLI) func() {
SkipIfUnsupportedPlatform(context.TODO(), oc)
infra, err := oc.AdminConfigClient().ConfigV1().Infrastructures().Get(context.Background(), "cluster", metav1.GetOptions{})
o.Expect(err).NotTo(o.HaveOccurred())
// For baremetal platforms, an extra worker must be previously deployed to allow subsequent scaling operations
if infra.Status.PlatformStatus.Type == configv1.BareMetalPlatformType {
dc, err := dynamic.NewForConfig(oc.KubeFramework().ClientConfig())
o.Expect(err).NotTo(o.HaveOccurred())
helper := bmhelper.NewBaremetalTestHelper(dc)
if helper.CanDeployExtraWorkers() {
helper.Setup()
helper.DeployExtraWorker(0)
}
return helper.DeleteAllExtraWorkers
}
return func() { /*noop*/ }
}
func SkipIfUnsupportedPlatform(ctx context.Context, oc *exutil.CLI) {
machineClientSet, err := machineclient.NewForConfig(oc.KubeFramework().ClientConfig())
o.Expect(err).ToNot(o.HaveOccurred())
machineClient := machineClientSet.MachineV1beta1().Machines("openshift-machine-api")
skipUnlessFunctionalMachineAPI(ctx, machineClient)
skipIfSingleNode(oc)
skipIfBareMetal(oc)
}
func skipUnlessFunctionalMachineAPI(ctx context.Context, machineClient machinev1beta1client.MachineInterface) {
machines, err := machineClient.List(ctx, metav1.ListOptions{LabelSelector: masterMachineLabelSelector})
// the machine API can be unavailable resulting in a 404 or an empty list
if err != nil {
if !apierrors.IsNotFound(err) {
o.Expect(err).ToNot(o.HaveOccurred())
}
e2eskipper.Skipf("haven't found machines resources on the cluster, this test can be run on a platform that supports functional MachineAPI")
return
}
if len(machines.Items) == 0 {
e2eskipper.Skipf("got an empty list of machines resources from the cluster, this test can be run on a platform that supports functional MachineAPI")
return
}
// we expect just a single machine to be in the Running state
for _, machine := range machines.Items {
phase := pointer.StringDeref(machine.Status.Phase, "")
if phase == "Running" {
return
}
}
e2eskipper.Skipf("haven't found a machine in running state, this test can be run on a platform that supports functional MachineAPI")
return
}
func skipIfAzure(oc *exutil.CLI) {
infra, err := oc.AdminConfigClient().ConfigV1().Infrastructures().Get(context.Background(), "cluster", metav1.GetOptions{})
o.Expect(err).NotTo(o.HaveOccurred())
if infra.Status.PlatformStatus.Type == configv1.AzurePlatformType {
e2eskipper.Skipf("this test is currently flaky on the azure platform")
}
}
func skipIfSingleNode(oc *exutil.CLI) {
infra, err := oc.AdminConfigClient().ConfigV1().Infrastructures().Get(context.Background(), "cluster", metav1.GetOptions{})
o.Expect(err).NotTo(o.HaveOccurred())
if infra.Status.ControlPlaneTopology == configv1.SingleReplicaTopologyMode {
e2eskipper.Skipf("this test can be run only against an HA cluster, skipping it on an SNO env")
}
}
func skipIfBareMetal(oc *exutil.CLI) {
infra, err := oc.AdminConfigClient().ConfigV1().Infrastructures().Get(context.Background(), "cluster", metav1.GetOptions{})
o.Expect(err).NotTo(o.HaveOccurred())
if infra.Status.PlatformStatus.Type == configv1.BareMetalPlatformType {
e2eskipper.Skipf("this test is currently broken on the metal platform and needs to be fixed")
}
}
func skipIfVsphere(oc *exutil.CLI) {
infra, err := oc.AdminConfigClient().ConfigV1().Infrastructures().Get(context.Background(), "cluster", metav1.GetOptions{})
o.Expect(err).NotTo(o.HaveOccurred())
if infra.Status.PlatformStatus.Type == configv1.VSpherePlatformType {
e2eskipper.Skipf("this test is currently broken on the vsphere platform and needs to be fixed (BZ2094919)")
}
}
func hasMachineDeletionHook(machine *machinev1beta1.Machine) bool {
for _, hook := range machine.Spec.LifecycleHooks.PreDrain {
if hook.Name == machineDeletionHookName && hook.Owner == machineDeletionHookOwner {
return true
}
}
return false
}
// transientAPIError returns true if the provided error indicates that a retry against an HA server has a good chance to succeed.
func transientAPIError(err error) bool {
switch {
case err == nil:
return false
case net.IsProbableEOF(err), net.IsConnectionReset(err), net.IsNoRoutesError(err), isClientConnectionLost(err):
return true
default:
return false
}
}
func isTransientAPIError(t TestingT, err error) (bool, error) {
// we tolerate some disruption until https://bugzilla.redhat.com/show_bug.cgi?id=2082778
// is fixed and rely on the monitor for reporting (p99).
// this is okay since we observe disruption during the upgrade jobs too,
// the only difference is that during the upgrade job we don’t access the API except from the monitor.
if transientAPIError(err) {
t.Logf("ignoring %v for now, the error is considered a transient error (will retry)", err)
return false, nil
}
return false, err
}
func isClientConnectionLost(err error) bool {
return strings.Contains(err.Error(), "client connection lost")
}