/
topology_operation_strom_cases.go
689 lines (623 loc) · 30.9 KB
/
topology_operation_strom_cases.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package e2e
import (
"context"
"fmt"
"sync"
"time"
ginkgo "github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
"github.com/vmware/govmomi/object"
vimtypes "github.com/vmware/govmomi/vim25/types"
"golang.org/x/crypto/ssh"
v1 "k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/test/e2e/framework"
fnodes "k8s.io/kubernetes/test/e2e/framework/node"
fpod "k8s.io/kubernetes/test/e2e/framework/pod"
fpv "k8s.io/kubernetes/test/e2e/framework/pv"
fss "k8s.io/kubernetes/test/e2e/framework/statefulset"
admissionapi "k8s.io/pod-security-admission/api"
)
var _ = ginkgo.Describe("[csi-topology-operation-strom-level5] "+
"Topology-Provisioning-With-OperationStrom-Cases", func() {
f := framework.NewDefaultFramework("e2e-vsphere-topology-aware-provisioning")
f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged
var (
client clientset.Interface
namespace string
bindingMode storagev1.VolumeBindingMode
allowedTopologies []v1.TopologySelectorLabelRequirement
topologyAffinityDetails map[string][]string
topologyCategories []string
sts_count int
statefulSetReplicaCount int32
nodeList *v1.NodeList
err error
topologyClusterList []string
powerOffHostsList []string
sshClientConfig *ssh.ClientConfig
k8sVersion string
nimbusGeneratedK8sVmPwd string
)
ginkgo.BeforeEach(func() {
client = f.ClientSet
namespace = f.Namespace.Name
var cancel context.CancelFunc
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
bootstrap()
nodeList, err = fnodes.GetReadySchedulableNodes(f.ClientSet)
framework.ExpectNoError(err, "Unable to find ready and schedulable Node")
if !(len(nodeList.Items) > 0) {
framework.Failf("Unable to find ready and schedulable Node")
}
sc, err := client.StorageV1().StorageClasses().Get(ctx, defaultNginxStorageClassName, metav1.GetOptions{})
if err == nil && sc != nil {
gomega.Expect(client.StorageV1().StorageClasses().Delete(ctx, sc.Name,
*metav1.NewDeleteOptions(0))).NotTo(gomega.HaveOccurred())
}
// fetching k8s version
v, err := client.Discovery().ServerVersion()
gomega.Expect(err).NotTo(gomega.HaveOccurred())
k8sVersion = v.Major + "." + v.Minor
bindingMode = storagev1.VolumeBindingWaitForFirstConsumer
topologyMap := GetAndExpectStringEnvVar(topologyMap)
topologyAffinityDetails, topologyCategories = createTopologyMapLevel5(topologyMap, topologyLength)
allowedTopologies = createAllowedTopolgies(topologyMap, topologyLength)
topologyClusterNames := GetAndExpectStringEnvVar(topologyCluster)
topologyClusterList = ListTopologyClusterNames(topologyClusterNames)
readVcEsxIpsViaTestbedInfoJson(GetAndExpectStringEnvVar(envTestbedInfoJsonPath))
nimbusGeneratedK8sVmPwd = GetAndExpectStringEnvVar(nimbusK8sVmPwd)
sshClientConfig = &ssh.ClientConfig{
User: "root",
Auth: []ssh.AuthMethod{
ssh.Password(nimbusGeneratedK8sVmPwd),
},
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
}
})
ginkgo.AfterEach(func() {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
ginkgo.By(fmt.Sprintf("Deleting all statefulsets in namespace: %v", namespace))
fss.DeleteAllStatefulSets(client, namespace)
ginkgo.By(fmt.Sprintf("Deleting service nginx in namespace: %v", namespace))
err := client.CoreV1().Services(namespace).Delete(ctx, servicename, *metav1.NewDeleteOptions(0))
if !apierrors.IsNotFound(err) {
gomega.Expect(err).NotTo(gomega.HaveOccurred())
}
})
/*
TESTCASE-1
Steps//
1. Create SC with volumeBindingMode as WaitForFirstConsumer and has parallel pod
management policy
2. Use the above created storageclass and create 3 stateful with parallel pod-management
policy and each with replica 20 and NodeSelector Terms set to all topology levels
3. Create Stateful set and wait for some time for Pod's to create and to be in running state
4. Once the POD is in running state verify that POD's are created on all topology levels as specified in
nodeAffinity entry
5. Describe on the PV's and Verify node affinity details
6. Bring down few esxi hosts in zone3
7. Wait for some time and verify that the nodes that are on zone3 comes up on different ESX's in
the same zone3 and respective POD's are back to running state.
8. Scale up First stateful set to 35 replica's and scaledown second stateful set to 5 Replicas
9. Bring Up zone3
10. Wait for k8s cluster to be healthy
11. Verify First stateful set is up with 35 replica's , and 2nd stateful sets is
scaled down to 5 succesffully
12. Bring down few esxi hosts in zone2
13. Wait for some time and verify that the nodes that are on zone2 comes up on different ESX's in
the same zone2 and respective POD's are back to running state.
14. Scale down First stateful set to 10 replica's
15. Scale up second stateful set to 35 replica's
16. Wait for some time for new pod's to get created.
17. Verify the node affinity details of PV should be proper
18. Bring up esx's in zone2
19. Check PVC CNS metadata data details
20. Delete all stateful sets
21. Delete PVC's and SC
*/
ginkgo.It("Volume provisioning when multiple statefulsets creation is in "+
"progress and in between zones hosts are down", ginkgo.Label(p1, block,
vanilla, level5, flaky, disruptive), func() {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
sts_count = 3
statefulSetReplicaCount = 20
var ssPods *v1.PodList
// get cluster details
clusterComputeResource, _, err = getClusterName(ctx, &e2eVSphere)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
// Get allowed topologies for Storage Class
allowedTopologyForSC := getTopologySelector(topologyAffinityDetails, topologyCategories,
topologyLength)
// Create SC with WFC BindingMode
ginkgo.By("Creating Storage Class with WFC Binding Mode and allowed topolgies of 5 levels")
storageclass, err := createStorageClass(client, nil, allowedTopologyForSC, "",
bindingMode, false, "nginx-sc")
gomega.Expect(err).NotTo(gomega.HaveOccurred())
defer func() {
err := client.StorageV1().StorageClasses().Delete(ctx, storageclass.Name, *metav1.NewDeleteOptions(0))
gomega.Expect(err).NotTo(gomega.HaveOccurred())
}()
// Creating StatefulSet service
ginkgo.By("Creating service")
service := CreateService(namespace, client)
defer func() {
deleteService(namespace, client, service)
}()
// Create multiple StatefulSets Specs in parallel
ginkgo.By("Creating multiple StatefulSets specs in parallel")
statefulSets := createParallelStatefulSetSpec(namespace, sts_count, statefulSetReplicaCount)
// Trigger multiple StatefulSets creation in parallel.
ginkgo.By("Trigger multiple StatefulSets creation in parallel")
var wg sync.WaitGroup
wg.Add(sts_count)
for i := 0; i < len(statefulSets); i++ {
go createParallelStatefulSets(client, namespace, statefulSets[i],
statefulSetReplicaCount, &wg)
}
wg.Wait()
defer func() {
ginkgo.By(fmt.Sprintf("Deleting all statefulsets in namespace: %v", namespace))
fss.DeleteAllStatefulSets(client, namespace)
}()
ginkgo.By("Waiting for StatefulSets Pods to be in Ready State")
err = waitForStsPodsToBeInReadyRunningState(ctx, client, namespace, statefulSets)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
/* Verify PV nde affinity and that the pods are running on appropriate nodes
for each StatefulSet pod */
ginkgo.By("Verify PV node affinity and that the PODS are running on appropriate node")
for i := 0; i < len(statefulSets); i++ {
err = verifyPVnodeAffinityAndPODnodedetailsForStatefulsetsLevel5(ctx, client,
statefulSets[i], namespace, allowedTopologies, true, false)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
}
// Bring down few esxi hosts that belongs to zone3
ginkgo.By("Bring down few esxi hosts that belongs to zone3")
hostsInCluster := getHostsByClusterName(ctx, clusterComputeResource, topologyClusterList[2])
powerOffHostsList = powerOffEsxiHostByCluster(ctx, &e2eVSphere, topologyClusterList[2],
(len(hostsInCluster) - 2))
defer func() {
ginkgo.By("Bring up all ESXi host which were powered off in zone3")
for i := 0; i < len(powerOffHostsList); i++ {
powerOnEsxiHostByCluster(powerOffHostsList[i])
}
}()
ginkgo.By("Wait for k8s cluster to be healthy")
wait4AllK8sNodesToBeUp(ctx, client, nodeList)
err = waitForAllNodes2BeReady(ctx, client, pollTimeout*4)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
// Verify all the workload Pods are in up and running state
ginkgo.By("Verify all the workload Pods are in up and running state")
ssPods = fss.GetPodList(client, statefulSets[1])
for _, pod := range ssPods.Items {
err := fpod.WaitForPodRunningInNamespace(client, &pod)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
}
// Scale up statefulSets replicas count
ginkgo.By("Scale up statefulset replica and verify the replica count")
statefulSetReplicaCount = 35
err = scaleUpStatefulSetPod(ctx, client, statefulSets[0], namespace, statefulSetReplicaCount,
true, false)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
ssPodsAfterScaleDown := GetListOfPodsInSts(client, statefulSets[0])
gomega.Expect(len(ssPodsAfterScaleDown.Items) == int(statefulSetReplicaCount)).To(gomega.BeTrue(),
"Number of Pods in the statefulset should match with number of replicas")
// Scale down statefulSets replica count
statefulSetReplicaCount = 5
ginkgo.By("Scale down statefulset replica and verify the replica count")
err = scaleDownStatefulSetPod(ctx, client, statefulSets[1], namespace, statefulSetReplicaCount, true, false)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
ssPodsAfterScaleDown = GetListOfPodsInSts(client, statefulSets[1])
gomega.Expect(len(ssPodsAfterScaleDown.Items) == int(statefulSetReplicaCount)).To(gomega.BeTrue(),
"Number of Pods in the statefulset should match with number of replicas")
// Bring up all esxi hosts which were powered off in zone3
ginkgo.By("Bring up all ESXi host which were powered off in zone3")
for i := 0; i < len(powerOffHostsList); i++ {
powerOnEsxiHostByCluster(powerOffHostsList[i])
}
ginkgo.By("Wait for k8s cluster to be healthy")
wait4AllK8sNodesToBeUp(ctx, client, nodeList)
err = waitForAllNodes2BeReady(ctx, client, pollTimeout*4)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
// Verify all the workload Pods are in up and running state
ginkgo.By("Verify all the workload Pods are in up and running state")
ssPods = fss.GetPodList(client, statefulSets[1])
for _, pod := range ssPods.Items {
err := fpod.WaitForPodRunningInNamespace(client, &pod)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
}
// Bring down few esxi hosts that belongs to zone2
ginkgo.By("Bring down few esxi hosts that belongs to zone2")
gomega.Expect(err).NotTo(gomega.HaveOccurred())
hostsInCluster = getHostsByClusterName(ctx, clusterComputeResource, topologyClusterList[1])
powerOffHostsList = powerOffEsxiHostByCluster(ctx, &e2eVSphere, topologyClusterList[1],
(len(hostsInCluster) - 2))
defer func() {
ginkgo.By("Bring up all ESXi host which were powered off in zone2")
for i := 0; i < len(powerOffHostsList); i++ {
powerOnEsxiHostByCluster(powerOffHostsList[i])
}
}()
ginkgo.By("Wait for k8s cluster to be healthy")
wait4AllK8sNodesToBeUp(ctx, client, nodeList)
err = waitForAllNodes2BeReady(ctx, client, pollTimeout*4)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
// Scale down statefulSets replicas count
statefulSetReplicaCount = 10
ginkgo.By("Scale down statefulset replica and verify the replica count")
err = scaleDownStatefulSetPod(ctx, client, statefulSets[0], namespace, statefulSetReplicaCount, true, false)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
ssPodsAfterScaleDown = GetListOfPodsInSts(client, statefulSets[0])
gomega.Expect(len(ssPodsAfterScaleDown.Items) == int(statefulSetReplicaCount)).To(gomega.BeTrue(),
"Number of Pods in the statefulset should match with number of replicas")
// Scale up statefulSets replica count
statefulSetReplicaCount = 35
ginkgo.By("Scale up statefulset replica and verify the replica count")
err = scaleUpStatefulSetPod(ctx, client, statefulSets[1], namespace, statefulSetReplicaCount,
true, false)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
ssPodsAfterScaleDown = GetListOfPodsInSts(client, statefulSets[1])
gomega.Expect(len(ssPodsAfterScaleDown.Items) == int(statefulSetReplicaCount)).To(gomega.BeTrue(),
"Number of Pods in the statefulset should match with number of replicas")
/* Verify PV nde affinity and that the pods are running on appropriate nodes
for each StatefulSet pod */
ginkgo.By("Verify PV node affinity and that the PODS are running on appropriate node")
for i := 0; i < len(statefulSets); i++ {
err = verifyPVnodeAffinityAndPODnodedetailsForStatefulsetsLevel5(ctx, client,
statefulSets[i], namespace, allowedTopologies, true, false)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
}
// Bring up all ESXi host which were powered off in zone2
ginkgo.By("Bring up all ESXi host which were powered off in zone2")
for i := 0; i < len(powerOffHostsList); i++ {
powerOnEsxiHostByCluster(powerOffHostsList[i])
}
ginkgo.By("Wait for k8s cluster to be healthy")
wait4AllK8sNodesToBeUp(ctx, client, nodeList)
err = waitForAllNodes2BeReady(ctx, client, pollTimeout*4)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
// Verify all the workload Pods are in up and running state
ginkgo.By("Verify all the workload Pods are in up and running state")
ssPods = fss.GetPodList(client, statefulSets[1])
for _, pod := range ssPods.Items {
err := fpod.WaitForPodRunningInNamespace(client, &pod)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
}
// verifyVolumeMetadataInCNS
ginkgo.By("Verify pod entry in CNS volume-metadata for the volumes associated with the PVC")
ssPodsBeforeScaleDown := fss.GetPodList(client, statefulSets[1])
for _, pod := range ssPodsBeforeScaleDown.Items {
_, err := client.CoreV1().Pods(namespace).Get(ctx, pod.Name, metav1.GetOptions{})
gomega.Expect(err).NotTo(gomega.HaveOccurred())
for _, volumespec := range pod.Spec.Volumes {
if volumespec.PersistentVolumeClaim != nil {
pv := getPvFromClaim(client, pod.Namespace, volumespec.PersistentVolumeClaim.ClaimName)
// Verify the attached volume match the one in CNS cache
err := verifyVolumeMetadataInCNS(&e2eVSphere, pv.Spec.CSI.VolumeHandle,
volumespec.PersistentVolumeClaim.ClaimName, pv.ObjectMeta.Name, pod.Name)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
}
}
}
// Scale down statefulSets replica count
statefulSetReplicaCount = 0
ginkgo.By("Scale down statefulset replica count")
for i := 0; i < len(statefulSets); i++ {
err = scaleDownStatefulSetPod(ctx, client, statefulSets[i], namespace, statefulSetReplicaCount, true, false)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
ssPodsAfterScaleDown := GetListOfPodsInSts(client, statefulSets[i])
gomega.Expect(len(ssPodsAfterScaleDown.Items) == int(statefulSetReplicaCount)).To(gomega.BeTrue(),
"Number of Pods in the statefulset should match with number of replicas")
}
defer func() {
pvcs, err := client.CoreV1().PersistentVolumeClaims(namespace).List(ctx, metav1.ListOptions{})
gomega.Expect(err).NotTo(gomega.HaveOccurred())
for _, claim := range pvcs.Items {
pv := getPvFromClaim(client, namespace, claim.Name)
err := fpv.DeletePersistentVolumeClaim(client, claim.Name, namespace)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
ginkgo.By("Verify it's PV and corresponding volumes are deleted from CNS")
err = fpv.WaitForPersistentVolumeDeleted(client, pv.Name, poll,
pollTimeout)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
volumeHandle := pv.Spec.CSI.VolumeHandle
err = e2eVSphere.waitForCNSVolumeToBeDeleted(volumeHandle)
gomega.Expect(err).NotTo(gomega.HaveOccurred(),
fmt.Sprintf("Volume: %s should not be present in the CNS after it is deleted from "+
"kubernetes", volumeHandle))
}
}()
})
/*
TESTCASE-2
Delete one of the container node + bring down esxi hosts of zone2
Steps//
1. Identify the Pod where CSI Provisioner is the leader.
2. Create Storage class with Wait for first consumer
3. Bring down all esxi hosts in zone2
4. Using the above created SC , Create 3 statefulset with parallel POD management policy
each with 10 replica's
5. While the Statefulsets is creating PVCs and Pods, kill CSI Provisioner container
identified in the step 1, where CSI provisioner is the leader.
6. csi-provisioner in other replica should take the leadership to help provisioning of the volume.
7. Bring up ESX's in zone2
8. Expect all PVCs for Statefulsets to be in the bound state.
9. Expect all Pods for Statefulsets to be in the running state
10. Describe PV and Verify the node affinity rule . Make sure node affinity should
contain all 5 levels of topology details
11. POD should be running in the appropriate nodes
12. Identify the Pod where CSI Attacher is the leader
13. Scale down the Statefulsets replica count to 5,
14. While the Statefulsets is scaling down Pods, kill CSI Attacher container
identified in the step 13, where CSI Attacher is the leader.
15. Wait until the POD count goes down to 5
16. Scale up replica count 20
17. PVC's, POD's should come up on all the topology levels specified.
18. Verify node affinity details on PV's should be proper with all 5 level details.
19. Delete statefulset
20. Delete PVC's and SC
*/
ginkgo.It("Volume provisioning when multiple statefulsets creation in progress and in "+
"between zones hosts and container nodes are down", ginkgo.Label(p1, block,
vanilla, level5, flaky, disruptive), func() {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
sts_count = 3
statefulSetReplicaCount = 10
var ssPods *v1.PodList
containerName := provisionerContainerName
var notReadyNodes []v1.Node
notReadyNodes = nil
// get cluster details
clusterComputeResource, _, err = getClusterName(ctx, &e2eVSphere)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
/* Get current leader Csi-Controller-Pod where CSI Provisioner is running and " +
find the master node IP where this Csi-Controller-Pod is running */
ginkgo.By("Get current leader Csi-Controller-Pod name where CSI Provisioner is running and " +
"find the master node IP where this container leader is running")
controller_name, k8sMasterIP, err := getK8sMasterNodeIPWhereContainerLeaderIsRunning(ctx,
client, sshClientConfig, containerName)
framework.Logf("CSI-Provisioner is running on Leader Pod %s "+
"which is running on master node %s", controller_name, k8sMasterIP)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
// Get allowed topologies for Storage Class
allowedTopologyForSC := getTopologySelector(topologyAffinityDetails, topologyCategories,
topologyLength)
// Create SC with WFC BindingMode
ginkgo.By("Creating Storage Class with WFC Binding Mode and allowed topolgies of 5 levels")
storageclass, err := createStorageClass(client, nil, allowedTopologyForSC, "",
bindingMode, false, "nginx-sc")
gomega.Expect(err).NotTo(gomega.HaveOccurred())
defer func() {
err := client.StorageV1().StorageClasses().Delete(ctx, storageclass.Name, *metav1.NewDeleteOptions(0))
gomega.Expect(err).NotTo(gomega.HaveOccurred())
}()
// Bring down all esxi hosts that belongs to zone2
ginkgo.By("Bring down all ESXi hosts that belongs to zone2")
hostsInCluster := getHostsByClusterName(ctx, clusterComputeResource, topologyClusterList[1])
powerOffHostsList = powerOffEsxiHostByCluster(ctx, &e2eVSphere, topologyClusterList[1],
len(hostsInCluster))
defer func() {
ginkgo.By("Bring up all ESXi host which were powered off in zone2")
for i := 0; i < len(powerOffHostsList); i++ {
powerOnEsxiHostByCluster(powerOffHostsList[i])
}
nodes, err := client.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
gomega.Expect(err).NotTo(gomega.HaveOccurred())
for _, node := range nodes.Items {
if !fnodes.IsConditionSetAsExpected(&node, v1.NodeReady, true) {
notReadyNodes = append(notReadyNodes, node)
}
}
if len(notReadyNodes) != 0 {
ginkgo.By("Bring up all K8s nodes which got disconnected due to powered off esxi hosts")
for _, nodeName := range notReadyNodes {
vmUUID := getNodeUUID(ctx, client, nodeName.Name)
gomega.Expect(vmUUID).NotTo(gomega.BeEmpty())
framework.Logf("VM uuid is: %s for node: %s", vmUUID, nodeName.Name)
vmRef, err := e2eVSphere.getVMByUUID(ctx, vmUUID)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
framework.Logf("vmRef: %v for the VM uuid: %s", vmRef, vmUUID)
gomega.Expect(vmRef).NotTo(gomega.BeNil(), "vmRef should not be nil")
vm := object.NewVirtualMachine(e2eVSphere.Client.Client, vmRef.Reference())
_, err = vm.PowerOn(ctx)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
err = vm.WaitForPowerState(ctx, vimtypes.VirtualMachinePowerStatePoweredOn)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
}
}
}()
// Creating StatefulSet service
ginkgo.By("Creating service")
service := CreateService(namespace, client)
defer func() {
deleteService(namespace, client, service)
}()
// Create multiple StatefulSets Specs in parallel
ginkgo.By("Creating multiple StatefulSets specs in parallel")
statefulSets := createParallelStatefulSetSpec(namespace, sts_count, statefulSetReplicaCount)
/* Trigger multiple StatefulSets creation in parallel.
During StatefulSets creation, kill CSI Provisioner container in between */
ginkgo.By("Trigger multiple StatefulSets creation in parallel. During StatefulSets " +
"creation, kill CSI Provisioner container in between ")
var wg sync.WaitGroup
wg.Add(sts_count)
for i := 0; i < len(statefulSets); i++ {
go createParallelStatefulSets(client, namespace, statefulSets[i],
statefulSetReplicaCount, &wg)
if i == 2 {
/* Kill container CSI-Provisioner on the master node where elected leader
is running */
ginkgo.By("Kill container CSI-Provisioner on the master node where elected leader " +
"is running")
err = execDockerPauseNKillOnContainer(sshClientConfig, k8sMasterIP, containerName, k8sVersion)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
}
}
wg.Wait()
// Bring up all ESXi host which were powered off in zone2
ginkgo.By("Bring up all ESXi host which were powered off in zone2")
for i := 0; i < len(powerOffHostsList); i++ {
powerOnEsxiHostByCluster(powerOffHostsList[i])
}
nodes, err := client.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
gomega.Expect(err).NotTo(gomega.HaveOccurred())
for _, node := range nodes.Items {
if !fnodes.IsConditionSetAsExpected(&node, v1.NodeReady, true) {
notReadyNodes = append(notReadyNodes, node)
}
}
if len(notReadyNodes) != 0 {
ginkgo.By("Bring up all K8s nodes which got disconnected due to powered off esxi hosts")
for _, nodeName := range notReadyNodes {
vmUUID := getNodeUUID(ctx, client, nodeName.Name)
gomega.Expect(vmUUID).NotTo(gomega.BeEmpty())
framework.Logf("VM uuid is: %s for node: %s", vmUUID, nodeName.Name)
vmRef, err := e2eVSphere.getVMByUUID(ctx, vmUUID)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
framework.Logf("vmRef: %v for the VM uuid: %s", vmRef, vmUUID)
gomega.Expect(vmRef).NotTo(gomega.BeNil(), "vmRef should not be nil")
vm := object.NewVirtualMachine(e2eVSphere.Client.Client, vmRef.Reference())
_, err = vm.PowerOn(ctx)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
err = vm.WaitForPowerState(ctx, vimtypes.VirtualMachinePowerStatePoweredOn)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
}
}
ginkgo.By("Wait for k8s cluster to be healthy")
wait4AllK8sNodesToBeUp(ctx, client, nodeList)
err = waitForAllNodes2BeReady(ctx, client, pollTimeout*4)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
/* Get newly elected leader Csi-Controller-Pod where CSI Provisioner is running" +
find new master node IP where this Csi-Controller-Pod is running */
ginkgo.By("Get newly Leader Csi-Controller-Pod where CSI Provisioner is running and " +
"find the master node IP where this Csi-Controller-Pod is running")
controller_name, k8sMasterIP, err = getK8sMasterNodeIPWhereContainerLeaderIsRunning(ctx,
client, sshClientConfig, containerName)
framework.Logf("CSI-Provisioner is running on newly elected Leader Pod %s "+
"which is running on master node %s", controller_name, k8sMasterIP)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
// Wait for testbed to be back to normal
time.Sleep(pollTimeoutShort)
ginkgo.By("Waiting for StatefulSets Pods to be in Ready State")
err = waitForStsPodsToBeInReadyRunningState(ctx, client, namespace, statefulSets)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
/* Verify PV nde affinity and that the pods are running on appropriate nodes
for each StatefulSet pod */
ginkgo.By("Verify PV node affinity and that the PODS are running on appropriate node")
for i := 0; i < len(statefulSets); i++ {
err = verifyPVnodeAffinityAndPODnodedetailsForStatefulsetsLevel5(ctx, client,
statefulSets[i], namespace, allowedTopologies, true, false)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
}
/* Get current leader Csi-Controller-Pod where CSI Attacher is running and " +
find the master node IP where this Csi-Controller-Pod is running */
containerName = "csi-attacher"
ginkgo.By("Get current leader Csi-Controller-Pod name where CSI Attacher is running and " +
"find the master node IP where this Csi-Controller-Pod is running")
controller_name, k8sMasterIP, err = getK8sMasterNodeIPWhereContainerLeaderIsRunning(ctx,
client, sshClientConfig, containerName)
framework.Logf("csi-attacher is running on Leader Pod %s "+
"which is running on master node %s", controller_name, k8sMasterIP)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
// Verify all the workload Pods are in up and running state
ginkgo.By("Verify all the workload Pods are in up and running state")
ssPods = fss.GetPodList(client, statefulSets[1])
for _, pod := range ssPods.Items {
err := fpod.WaitForPodRunningInNamespace(client, &pod)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
}
// Scale down statefulSets replicas count
ginkgo.By("Scaledown StatefulSets replica in parallel. During StatefulSets replica scaledown " +
"kill CSI Attacher container in between")
statefulSetReplicaCount = 5
ginkgo.By("Scale down statefulset replica and verify the replica count")
for i := 0; i < len(statefulSets); i++ {
err = scaleDownStatefulSetPod(ctx, client, statefulSets[i], namespace, statefulSetReplicaCount, true, false)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
ssPodsAfterScaleDown := GetListOfPodsInSts(client, statefulSets[i])
gomega.Expect(len(ssPodsAfterScaleDown.Items) == int(statefulSetReplicaCount)).To(gomega.BeTrue(),
"Number of Pods in the statefulset should match with number of replicas")
if i == 2 {
/* Kill csi-attacher container */
ginkgo.By("Kill csi-attacher container")
err = execDockerPauseNKillOnContainer(sshClientConfig, k8sMasterIP, containerName, k8sVersion)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
}
}
/* Get newly elected leader Csi-Controller-Pod where CSI Provisioner is running" +
find new master node IP where this Csi-Controller-Pod is running */
ginkgo.By("Get newly elected leader Csi-Controller-Pod where CSI Attacher is running and " +
"find the master node IP where this Csi-Controller-Pod is running")
controller_name, k8sMasterIP, err = getK8sMasterNodeIPWhereContainerLeaderIsRunning(ctx,
client, sshClientConfig, containerName)
framework.Logf("csi-attacher is running on elected Leader Pod %s "+
"which is running on master node %s", controller_name, k8sMasterIP)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
for i := 0; i < len(statefulSets); i++ {
// Scale up statefulSets replicas count
statefulSetReplicaCount = 20
ginkgo.By("Scale up statefulset replica and verify the replica count")
err = scaleUpStatefulSetPod(ctx, client, statefulSets[i], namespace, statefulSetReplicaCount,
true, false)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
ssPodsAfterScaleDown := GetListOfPodsInSts(client, statefulSets[i])
gomega.Expect(len(ssPodsAfterScaleDown.Items) == int(statefulSetReplicaCount)).To(gomega.BeTrue(),
"Number of Pods in the statefulset should match with number of replicas")
}
/* Verify PV nde affinity and that the pods are running on appropriate nodes
for each StatefulSet pod */
ginkgo.By("Verify PV node affinity and that the PODS are running on appropriate node")
for i := 0; i < len(statefulSets); i++ {
err = verifyPVnodeAffinityAndPODnodedetailsForStatefulsetsLevel5(ctx, client,
statefulSets[i], namespace, allowedTopologies, true, false)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
}
// Scale down statefulSets replica count
statefulSetReplicaCount = 0
ginkgo.By("Scale down statefulset replica count")
for i := 0; i < len(statefulSets); i++ {
err = scaleDownStatefulSetPod(ctx, client, statefulSets[i], namespace, statefulSetReplicaCount, true, false)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
ssPodsAfterScaleDown := GetListOfPodsInSts(client, statefulSets[i])
gomega.Expect(len(ssPodsAfterScaleDown.Items) == int(statefulSetReplicaCount)).To(gomega.BeTrue(),
"Number of Pods in the statefulset should match with number of replicas")
}
defer func() {
pvcs, err := client.CoreV1().PersistentVolumeClaims(namespace).List(ctx, metav1.ListOptions{})
gomega.Expect(err).NotTo(gomega.HaveOccurred())
for _, claim := range pvcs.Items {
pv := getPvFromClaim(client, namespace, claim.Name)
err := fpv.DeletePersistentVolumeClaim(client, claim.Name, namespace)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
ginkgo.By("Verify it's PV and corresponding volumes are deleted from CNS")
err = fpv.WaitForPersistentVolumeDeleted(client, pv.Name, poll,
pollTimeout)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
volumeHandle := pv.Spec.CSI.VolumeHandle
err = e2eVSphere.waitForCNSVolumeToBeDeleted(volumeHandle)
gomega.Expect(err).NotTo(gomega.HaveOccurred(),
fmt.Sprintf("Volume: %s should not be present in the CNS after it is deleted from "+
"kubernetes", volumeHandle))
}
}()
})
})