Skip to content

Commit

Permalink
Filter out LiveMigrate workloadUpdateMethod on SNO (#1823)
Browse files Browse the repository at this point in the history
Explicitly filter out LiveMigrate
workloadUpdateMethod on SNO clusters.
On SNO cluster Live Migration FG is globally
disabled because we have a single worker node
but still we allow the user (and it's
the default) to pass
workloadUpdateMethod=["LiveMigrate"]
and this causes an infinite loop in
the upgrade handler in virt-controller
that currently ignores the LiveMigration FG.
Let's explicitly filter out LiveMigrate
workloadUpdateMethod to prevent the issue.

Bug-url: https://bugzilla.redhat.com/show_bug.cgi?id=2065308

Signed-off-by: Simone Tiraboschi <stirabos@redhat.com>
  • Loading branch information
tiraboschi committed Mar 21, 2022
1 parent 6524a75 commit f7d4d37
Show file tree
Hide file tree
Showing 9 changed files with 103 additions and 7 deletions.
1 change: 1 addition & 0 deletions api/v1beta1/hyperconverged_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,7 @@ type HyperConvergedWorkloadUpdateStrategy struct {
// precedence over more disruptive methods. For example if both LiveMigrate and Evict
// methods are listed, only VMs which are not live migratable will be restarted/shutdown.
// An empty list defaults to no automated workload updating.
// LiveMigrate is ignored on SNO clusters.
//
// +listType=atomic
// +kubebuilder:default={"LiveMigrate"}
Expand Down
3 changes: 2 additions & 1 deletion config/crd/bases/hco.kubevirt.io_hyperconvergeds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2148,7 +2148,8 @@ spec:
takes precedence over more disruptive methods. For example if
both LiveMigrate and Evict methods are listed, only VMs which
are not live migratable will be restarted/shutdown. An empty
list defaults to no automated workload updating.
list defaults to no automated workload updating. LiveMigrate
is ignored on SNO clusters.
items:
type: string
type: array
Expand Down
22 changes: 20 additions & 2 deletions controllers/operands/kubevirt.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,9 +280,11 @@ func hcWorkloadUpdateStrategyToKv(hcObject *hcov1beta1.HyperConvergedWorkloadUpd
*kvObject.BatchEvictionSize = *hcObject.BatchEvictionSize
}

if size := len(hcObject.WorkloadUpdateMethods); size > 0 {
filteredWUMethods := filterWorkloadupdatemethods(hcObject.WorkloadUpdateMethods)

if size := len(filteredWUMethods); size > 0 {
kvObject.WorkloadUpdateMethods = make([]kubevirtcorev1.WorkloadUpdateMethod, size)
for i, updateMethod := range hcObject.WorkloadUpdateMethods {
for i, updateMethod := range filteredWUMethods {
kvObject.WorkloadUpdateMethods[i] = kubevirtcorev1.WorkloadUpdateMethod(updateMethod)
}
}
Expand All @@ -291,6 +293,22 @@ func hcWorkloadUpdateStrategyToKv(hcObject *hcov1beta1.HyperConvergedWorkloadUpd
return kvObject
}

func filterWorkloadupdatemethods(workloadUpdateMethods []string) []string {
var filteredWUMethods []string

if hcoutil.GetClusterInfo().IsInfrastructureHighlyAvailable() {
return workloadUpdateMethods
} else {
for _, method := range workloadUpdateMethods {
if method != string(kubevirtcorev1.WorkloadUpdateMethodLiveMigrate) {
filteredWUMethods = append(filteredWUMethods, method)
}
}
}

return filteredWUMethods
}

func getKVConfig(hc *hcov1beta1.HyperConverged) (*kubevirtcorev1.KubeVirtConfiguration, error) {
devConfig, err := getKVDevConfig(hc)
if err != nil {
Expand Down
72 changes: 72 additions & 0 deletions controllers/operands/kubevirt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2062,6 +2062,17 @@ Version: 1.2.3`)

Context("Workload Update Strategy", func() {
defaultBatchEvictionSize := 10
getClusterInfo := hcoutil.GetClusterInfo

BeforeEach(func() {
hcoutil.GetClusterInfo = func() hcoutil.ClusterInfo {
return &commonTestUtils.ClusterInfoMock{}
}
})

AfterEach(func() {
hcoutil.GetClusterInfo = getClusterInfo
})

It("should add Workload Update Strategy if missing in KV", func() {
existingResource, err := NewKubeVirt(hco)
Expand Down Expand Up @@ -2220,6 +2231,67 @@ Version: 1.2.3`)
Expect(*foundUpdateStrategy.BatchEvictionSize).Should(Equal(hcoModifiedBatchEvictionSize))
Expect(foundUpdateStrategy.BatchEvictionInterval.Duration.String()).Should(Equal("5m0s"))
})

DescribeTable("Should ignore LiveMigrate Workload Update Strategy on SNO",
func(hcoWorkloadUpdateMethods []string, expectedKVWorkloadUpdateMethods []kubevirtcorev1.WorkloadUpdateMethod) {

hcoutil.GetClusterInfo = func() hcoutil.ClusterInfo {
return &commonTestUtils.ClusterInfoSNOMock{}
}

existingKv, err := NewKubeVirt(hco)
Expect(err).ToNot(HaveOccurred())

hcoModifiedBatchEvictionSize := 5
hco.Spec.WorkloadUpdateStrategy.WorkloadUpdateMethods = hcoWorkloadUpdateMethods
hco.Spec.WorkloadUpdateStrategy.BatchEvictionInterval = &metav1.Duration{Duration: time.Minute * 5}
hco.Spec.WorkloadUpdateStrategy.BatchEvictionSize = &hcoModifiedBatchEvictionSize

cl := commonTestUtils.InitClient([]runtime.Object{hco, existingKv})
handler := (*genericOperand)(newKubevirtHandler(cl, commonTestUtils.GetScheme()))
res := handler.ensure(req)
Expect(res.UpgradeDone).To(BeFalse())
Expect(res.Updated).To(BeTrue())
Expect(res.Err).To(BeNil())

foundKv := &kubevirtcorev1.KubeVirt{}
Expect(
cl.Get(context.TODO(),
types.NamespacedName{Name: existingKv.Name, Namespace: existingKv.Namespace},
foundKv),
).To(BeNil())

Expect(foundKv.Spec.WorkloadUpdateStrategy.WorkloadUpdateMethods).Should(HaveLen(len(expectedKVWorkloadUpdateMethods)))
for _, expected := range expectedKVWorkloadUpdateMethods {
Expect(foundKv.Spec.WorkloadUpdateStrategy.WorkloadUpdateMethods).Should(ContainElements(expected))
}
Expect(foundKv.Spec.WorkloadUpdateStrategy.WorkloadUpdateMethods).ShouldNot(ContainElements(kubevirtcorev1.WorkloadUpdateMethod("LiveMigrate")))
},
Entry("LiveMigrate and others, LiveMigrate first",
[]string{"LiveMigrate", "test1", "test2"},
[]kubevirtcorev1.WorkloadUpdateMethod{"test1", "test2"},
),
Entry("LiveMigrate and others, LiveMigrate in the middle",
[]string{"test1", "LiveMigrate", "test2"},
[]kubevirtcorev1.WorkloadUpdateMethod{"test1", "test2"},
),
Entry("LiveMigrate and others, LiveMigrate last",
[]string{"test1", "test2", "LiveMigrate"},
[]kubevirtcorev1.WorkloadUpdateMethod{"test1", "test2"},
),
Entry("LiveMigrate only",
[]string{"LiveMigrate"},
[]kubevirtcorev1.WorkloadUpdateMethod{},
),
Entry("empty",
[]string{},
[]kubevirtcorev1.WorkloadUpdateMethod{},
),
Entry("LiveMigrate and Evict",
[]string{"LiveMigrate", "Evict"},
[]kubevirtcorev1.WorkloadUpdateMethod{"Evict"},
))

})

Context("SNO replicas", func() {
Expand Down
3 changes: 2 additions & 1 deletion deploy/crds/hco00.crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2148,7 +2148,8 @@ spec:
takes precedence over more disruptive methods. For example if
both LiveMigrate and Evict methods are listed, only VMs which
are not live migratable will be restarted/shutdown. An empty
list defaults to no automated workload updating.
list defaults to no automated workload updating. LiveMigrate
is ignored on SNO clusters.
items:
type: string
type: array
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2148,7 +2148,8 @@ spec:
takes precedence over more disruptive methods. For example if
both LiveMigrate and Evict methods are listed, only VMs which
are not live migratable will be restarted/shutdown. An empty
list defaults to no automated workload updating.
list defaults to no automated workload updating. LiveMigrate
is ignored on SNO clusters.
items:
type: string
type: array
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2148,7 +2148,8 @@ spec:
takes precedence over more disruptive methods. For example if
both LiveMigrate and Evict methods are listed, only VMs which
are not live migratable will be restarted/shutdown. An empty
list defaults to no automated workload updating.
list defaults to no automated workload updating. LiveMigrate
is ignored on SNO clusters.
items:
type: string
type: array
Expand Down
2 changes: 1 addition & 1 deletion docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ HyperConvergedWorkloadUpdateStrategy defines options related to updating a KubeV

| Field | Description | Scheme | Default | Required |
| ----- | ----------- | ------ | -------- |-------- |
| workloadUpdateMethods | WorkloadUpdateMethods defines the methods that can be used to disrupt workloads during automated workload updates. When multiple methods are present, the least disruptive method takes precedence over more disruptive methods. For example if both LiveMigrate and Evict methods are listed, only VMs which are not live migratable will be restarted/shutdown. An empty list defaults to no automated workload updating. | []string | {"LiveMigrate"} | false |
| workloadUpdateMethods | WorkloadUpdateMethods defines the methods that can be used to disrupt workloads during automated workload updates. When multiple methods are present, the least disruptive method takes precedence over more disruptive methods. For example if both LiveMigrate and Evict methods are listed, only VMs which are not live migratable will be restarted/shutdown. An empty list defaults to no automated workload updating. LiveMigrate is ignored on SNO clusters. | []string | {"LiveMigrate"} | false |
| batchEvictionSize | BatchEvictionSize Represents the number of VMIs that can be forced updated per the BatchShutdownInteral interval | *int | 10 | false |
| batchEvictionInterval | BatchEvictionInterval Represents the interval to wait before issuing the next batch of shutdowns | *metav1.Duration | "1m0s" | false |

Expand Down
1 change: 1 addition & 0 deletions docs/cluster-configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,7 @@ The `workloadUpdateStrategy` fields are:
An empty list defaults to no automated workload updating.

The default values is `LiveMigrate`; `Evict` is not enabled by default being potentially disruptive for the existing workloads.
* `LiveMigrate` is ignored on SNO clusters where LiveMigrate is not supported.

### workloadUpdateStrategy example
```yaml
Expand Down

0 comments on commit f7d4d37

Please sign in to comment.