Skip to content

Commit

Permalink
Remove pre-drain hook in state.propagate when a machine is disabled
Browse files Browse the repository at this point in the history
  • Loading branch information
Nuckal777 committed Apr 2, 2024
1 parent 456a8ea commit bd71469
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 19 deletions.
2 changes: 2 additions & 0 deletions constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ package constants
const (
ApproveDeletionLabelKey string = "runtime-extension-maintenance-controller.cloud.sap/approve-deletion"
ApproveDeletionLabelValue string = "true"
EnabledLabelKey string = "runtime-extension-maintenance-controller.cloud.sap/enabled"
EnabledLabelValue string = "true"
PreDrainDeleteHookAnnotationKey string = "pre-drain.delete.hook.machine.cluster.x-k8s.io/maintenance-controller"
PreDrainDeleteHookAnnotationValue string = "runtime-extension-maintenance-controller"
)
13 changes: 4 additions & 9 deletions management/machine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,6 @@ import (
"sigs.k8s.io/controller-runtime/pkg/controller"
)

const (
MaintenanceControllerLabelKey string = "runtime-extension-maintenance-controller.cloud.sap/enabled"
MaintenanceControllerLabelValue string = "true"
)

type MachineReconciler struct {
client.Client
Log logr.Logger
Expand All @@ -60,12 +55,12 @@ func (r *MachineReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
return ctrl.Result{}, err
}
clusterName := machine.Spec.ClusterName
isMaintenanceController, ok := machine.Labels[MaintenanceControllerLabelKey]
enabledValue, ok := machine.Labels[constants.EnabledLabelKey]
// It should be safe to only perform cleanup in the machine controller, when the following occurs:
// - The last relevant machine object is deleted, but cluster-api cleanup is blocked by pre-drain hook
// - cloud.sap/maintenance-controller label is removed on the node
// - This removes the pre-drain hook => causing reconciliation and cleanup
if !ok || isMaintenanceController != MaintenanceControllerLabelValue {
if !ok || enabledValue != constants.EnabledLabelValue {
return ctrl.Result{}, r.cleanupMachine(ctx, &machine, clusterName)
}
_, ok = r.WorkloadNodeControllers[clusterName]
Expand Down Expand Up @@ -110,13 +105,13 @@ func (r *MachineReconciler) cleanupMachine(ctx context.Context, machine *cluster
// cleanup pre-drain hook
delete(machine.Annotations, constants.PreDrainDeleteHookAnnotationKey)
if err := r.Client.Patch(ctx, machine, client.MergeFrom(original)); err != nil {
return fmt.Errorf("failed to remove pre-drain hook for machine %s", machine.Name)
return fmt.Errorf("failed to remove pre-drain hook for machine %s: %w", machine.Name, err)
}
r.Log.Info("removed pre-drain hook", "machine", machine.Name, "cluster", cluster)
// cleanup workload node reconciler, if no machine uses maintenance-controller
selector := client.MatchingLabels{
clusterv1beta1.ClusterNameLabel: cluster,
MaintenanceControllerLabelKey: MaintenanceControllerLabelValue,
constants.EnabledLabelKey: constants.EnabledLabelValue,
}
var machineList clusterv1beta1.MachineList
if err := r.Client.List(ctx, &machineList, selector); err != nil {
Expand Down
16 changes: 13 additions & 3 deletions management/machine_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/sapcc/runtime-extension-maintenance-controller/constants"
"github.com/sapcc/runtime-extension-maintenance-controller/management"
"github.com/sapcc/runtime-extension-maintenance-controller/state"

corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -54,8 +53,8 @@ var _ = Describe("The MachineReconciler", func() {
machine.Name = machineName
machine.Namespace = metav1.NamespaceDefault
machine.Labels = map[string]string{
clusterv1beta1.ClusterNameLabel: "management",
management.MaintenanceControllerLabelKey: management.MaintenanceControllerLabelValue,
clusterv1beta1.ClusterNameLabel: "management",
constants.EnabledLabelKey: constants.EnabledLabelValue,
}
machine.Spec.ClusterName = "management"
Expect(managementClient.Create(context.Background(), machine)).To(Succeed())
Expand Down Expand Up @@ -146,6 +145,17 @@ var _ = Describe("The MachineReconciler", func() {
}).ShouldNot(HaveKey(constants.PreDrainDeleteHookAnnotationKey))
})

It("removes the pre-drain hook when the controller is disabled", func() {
originalMachine := machine.DeepCopy()
delete(machine.Labels, constants.EnabledLabelKey)
Expect(managementClient.Patch(context.Background(), machine, client.MergeFrom(originalMachine))).To(Succeed())
Eventually(func(g Gomega) map[string]string {
var result clusterv1beta1.Machine
g.Expect(managementClient.Get(context.Background(), client.ObjectKeyFromObject(machine), &result)).To(Succeed())
return result.Annotations
}).ShouldNot(HaveKey(constants.PreDrainDeleteHookAnnotationKey))
})

})

})
28 changes: 21 additions & 7 deletions state/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,26 +42,22 @@ func propagate(log logr.Logger, machine *clusterv1beta1.Machine, node *corev1.No
log.Info("machine has no nodeRef")
return
}
_, hasMaintenanceState := node.Labels[MaintenanceStateLabelKey]
if machine.Annotations == nil {
machine.Annotations = make(map[string]string)
}
if node.Labels == nil {
node.Labels = make(map[string]string)
}
approved, ok := node.Labels[constants.ApproveDeletionLabelKey]
// Add deletion hook to machines that have a noderef and the cloud.sap/maintenance-state label
// Remove deletion hooks from machines whose nodes don't have the cloud.sap/maintenance-state label
if hasMaintenanceState && (!ok || approved != constants.ApproveDeletionLabelValue) {

if needsPreDrainHook(machine, node) {
machine.Annotations[constants.PreDrainDeleteHookAnnotationKey] = constants.PreDrainDeleteHookAnnotationValue
log.Info("queueing pre-drain hook attachment")
} else {
delete(machine.Annotations, constants.PreDrainDeleteHookAnnotationKey)
log.Info("queueing pre-drain hook removal")
}

// For to be deleted machines with hook deliver label onto the node (deletion timestamp)
if hasMaintenanceState && machine.DeletionTimestamp != nil {
if needsDeletionLabel(machine, node) {
node.Labels[MachineDeletedLabelKey] = MachineDeletedLabelValue
log.Info("queueing machine deletion label attachment")
} else {
Expand All @@ -70,6 +66,24 @@ func propagate(log logr.Logger, machine *clusterv1beta1.Machine, node *corev1.No
}
}

// Add pre-drain hook to machines that have a noderef and the cloud.sap/maintenance-state label.
func needsPreDrainHook(machine *clusterv1beta1.Machine, node *corev1.Node) bool {
_, hasMaintenanceState := node.Labels[MaintenanceStateLabelKey]
approved, ok := node.Labels[constants.ApproveDeletionLabelKey]
isApproved := ok && approved == constants.ApproveDeletionLabelValue
enabledValue, ok := machine.Labels[constants.EnabledLabelKey]
isEnabled := ok && enabledValue == constants.EnabledLabelValue
return hasMaintenanceState && isEnabled && !isApproved
}

// For to be deleted machines with hook deliver label onto the node (deletion timestamp).
func needsDeletionLabel(machine *clusterv1beta1.Machine, node *corev1.Node) bool {
_, hasMaintenanceState := node.Labels[MaintenanceStateLabelKey]
enabledValue, ok := machine.Labels[constants.EnabledLabelKey]
isEnabled := ok && enabledValue == constants.EnabledLabelValue
return hasMaintenanceState && isEnabled && machine.DeletionTimestamp != nil
}

type Reconciler struct {
Log logr.Logger
ManagementClient client.Client
Expand Down
25 changes: 25 additions & 0 deletions workload/node_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ var _ = Describe("The NodeController", func() {
machine = &clusterv1beta1.Machine{}
machine.Name = machineName
machine.Namespace = metav1.NamespaceDefault
machine.Labels = map[string]string{
constants.EnabledLabelKey: constants.EnabledLabelValue,
}
machine.Spec.ClusterName = "management"
Expect(managementClient.Create(context.Background(), machine)).To(Succeed())
machine.Status.NodeRef = &corev1.ObjectReference{
Expand Down Expand Up @@ -106,4 +109,26 @@ var _ = Describe("The NodeController", func() {
}).ShouldNot(HaveKey(constants.PreDrainDeleteHookAnnotationKey))
})

It("removes the pre-drain hook when the controller is disabled", func() {
originalNode := node.DeepCopy()
node.Labels = map[string]string{state.MaintenanceStateLabelKey: "whatever"}
Expect(workloadClient.Patch(context.Background(), node, client.MergeFrom(originalNode))).To(Succeed())
Eventually(func(g Gomega) map[string]string {
var result clusterv1beta1.Machine
g.Expect(managementClient.Get(context.Background(), client.ObjectKeyFromObject(machine), &result))
return result.Annotations
}).Should(HaveKeyWithValue(constants.PreDrainDeleteHookAnnotationKey, constants.PreDrainDeleteHookAnnotationValue))
originalMachine := machine.DeepCopy()
machine.Labels = map[string]string{}
Expect(managementClient.Patch(context.Background(), machine, client.MergeFrom(originalMachine))).To(Succeed())
originalNode = node.DeepCopy()
node.Labels = map[string]string{state.MaintenanceStateLabelKey: "banana"}
Expect(workloadClient.Patch(context.Background(), node, client.MergeFrom(originalNode))).To(Succeed())
Eventually(func(g Gomega) map[string]string {
var result clusterv1beta1.Machine
g.Expect(managementClient.Get(context.Background(), client.ObjectKeyFromObject(machine), &result))
return result.Annotations
}).ShouldNot(HaveKey(constants.PreDrainDeleteHookAnnotationKey))
})

})

0 comments on commit bd71469

Please sign in to comment.