Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions examples/machine-controller.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,15 @@ rules:
- "list"
- "get"
- "watch"
# volumeAttachments permissions are needed by vsphere clusters
- apiGroups:
- "storage.k8s.io"
resources:
- "volumeattachments"
verbs:
- "list"
- "get"
- "watch"
- apiGroups:
- ""
resources:
Expand Down
141 changes: 94 additions & 47 deletions pkg/controller/machine/machine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import (
"github.com/kubermatic/machine-controller/pkg/containerruntime"
kuberneteshelper "github.com/kubermatic/machine-controller/pkg/kubernetes"
"github.com/kubermatic/machine-controller/pkg/node/eviction"
"github.com/kubermatic/machine-controller/pkg/node/poddeletion"
"github.com/kubermatic/machine-controller/pkg/providerconfig"
providerconfigtypes "github.com/kubermatic/machine-controller/pkg/providerconfig/types"
"github.com/kubermatic/machine-controller/pkg/rhsm"
Expand Down Expand Up @@ -408,7 +409,7 @@ func (r *Reconciler) reconcile(ctx context.Context, machine *clusterv1alpha1.Mac

// step 2: check if a user requested to delete the machine
if machine.DeletionTimestamp != nil {
return r.deleteMachine(ctx, prov, machine)
return r.deleteMachine(ctx, prov, providerConfig.CloudProvider, machine)
}

// Step 3: Essentially creates an instance for the given machine.
Expand Down Expand Up @@ -462,6 +463,30 @@ func (r *Reconciler) ensureMachineHasNodeReadyCondition(machine *clusterv1alpha1
})
}

func (r *Reconciler) shouldCleanupVolumes(ctx context.Context, machine *clusterv1alpha1.Machine, providerName providerconfigtypes.CloudProvider) (bool, error) {
// we need to wait for volumeAttachments clean up only for vSphere
if providerName != providerconfigtypes.CloudProviderVsphere {
return false, nil
}

// No node - No volumeAttachments to be collected
if machine.Status.NodeRef == nil {
klog.V(4).Infof("Skipping eviction for machine %q since it does not have a node", machine.Name)
return false, nil
}

node := &corev1.Node{}
if err := r.client.Get(ctx, types.NamespacedName{Name: machine.Status.NodeRef.Name}, node); err != nil {
// Node does not exist - No volumeAttachments to be collected
if kerrors.IsNotFound(err) {
klog.V(4).Infof("Skipping eviction for machine %q since it does not have a node", machine.Name)
return false, nil
}
return false, fmt.Errorf("failed to get node %q", machine.Status.NodeRef.Name)
}
return true, nil
}

// evictIfNecessary checks if the machine has a node and evicts it if necessary
func (r *Reconciler) shouldEvict(ctx context.Context, machine *clusterv1alpha1.Machine) (bool, error) {
// If the deletion got triggered a few hours ago, skip eviction.
Expand Down Expand Up @@ -521,22 +546,35 @@ func (r *Reconciler) shouldEvict(ctx context.Context, machine *clusterv1alpha1.M
}

// deleteMachine makes sure that an instance has gone in a series of steps.
func (r *Reconciler) deleteMachine(ctx context.Context, prov cloudprovidertypes.Provider, machine *clusterv1alpha1.Machine) (*reconcile.Result, error) {
func (r *Reconciler) deleteMachine(ctx context.Context, prov cloudprovidertypes.Provider, providerName providerconfigtypes.CloudProvider, machine *clusterv1alpha1.Machine) (*reconcile.Result, error) {
shouldEvict, err := r.shouldEvict(ctx, machine)
if err != nil {
return nil, err
}
shouldCleanUpVolumes, err := r.shouldCleanupVolumes(ctx, machine, providerName)
if err != nil {
return nil, err
}

var evictedSomething, deletedSomething bool
var volumesFree = true
if shouldEvict {
evictedSomething, err := eviction.New(ctx, machine.Status.NodeRef.Name, r.client, r.kubeClient).Run()
evictedSomething, err = eviction.New(ctx, machine.Status.NodeRef.Name, r.client, r.kubeClient).Run()
if err != nil {
return nil, fmt.Errorf("failed to evict node %s: %v", machine.Status.NodeRef.Name, err)
}
if evictedSomething {
return &reconcile.Result{RequeueAfter: 10 * time.Second}, nil
}
if shouldCleanUpVolumes {
deletedSomething, volumesFree, err = poddeletion.New(ctx, machine.Status.NodeRef.Name, r.client, r.kubeClient).Run()
if err != nil {
return nil, fmt.Errorf("failed to delete pods bound to volumes running on node %s: %v", machine.Status.NodeRef.Name, err)
}
}

if evictedSomething || deletedSomething || !volumesFree {
return &reconcile.Result{RequeueAfter: 10 * time.Second}, nil
}

if result, err := r.deleteCloudProviderInstance(prov, machine); result != nil || err != nil {
return result, err
}
Expand All @@ -550,7 +588,52 @@ func (r *Reconciler) deleteMachine(ctx context.Context, prov cloudprovidertypes.
return nil, nil
}

return nil, r.deleteNodeForMachine(ctx, machine)
nodes, err := r.retrieveNodesRelatedToMachine(ctx, machine)
if err != nil {
return nil, err
}

return nil, r.deleteNodeForMachine(ctx, nodes, machine)
}

func (r *Reconciler) retrieveNodesRelatedToMachine(ctx context.Context, machine *clusterv1alpha1.Machine) ([]*corev1.Node, error) {
nodes := make([]*corev1.Node, 0)

// If there's NodeRef on the Machine object, retrieve the node by using the
// value of the NodeRef. If there's no NodeRef, try to find the Node by
// listing nodes using the NodeOwner label selector.
if machine.Status.NodeRef != nil {
objKey := ctrlruntimeclient.ObjectKey{Name: machine.Status.NodeRef.Name}
node := &corev1.Node{}
if err := r.client.Get(ctx, objKey, node); err != nil {
if !kerrors.IsNotFound(err) {
return nil, fmt.Errorf("failed to get node %s: %v", machine.Status.NodeRef.Name, err)
}
klog.V(2).Infof("node %q does not longer exist for machine %q", machine.Status.NodeRef.Name, machine.Spec.Name)
} else {
nodes = append(nodes, node)
}
} else {
selector, err := labels.Parse(NodeOwnerLabelName + "=" + string(machine.UID))
if err != nil {
return nil, fmt.Errorf("failed to parse label selector: %v", err)
}
listOpts := &ctrlruntimeclient.ListOptions{LabelSelector: selector}
nodeList := &corev1.NodeList{}
if err := r.client.List(ctx, nodeList, listOpts); err != nil {
return nil, fmt.Errorf("failed to list nodes: %v", err)
}
if len(nodeList.Items) == 0 {
// We just want log that we didn't found the node.
klog.V(3).Infof("No node found for the machine %s", machine.Spec.Name)
}

for _, node := range nodeList.Items {
nodes = append(nodes, &node)
}
}

return nodes, nil
}

func (r *Reconciler) deleteCloudProviderInstance(prov cloudprovidertypes.Provider, machine *clusterv1alpha1.Machine) (*reconcile.Result, error) {
Expand Down Expand Up @@ -623,50 +706,14 @@ func (r *Reconciler) deleteCloudProviderInstance(prov cloudprovidertypes.Provide
})
}

func (r *Reconciler) deleteNodeForMachine(ctx context.Context, machine *clusterv1alpha1.Machine) error {
// If there's NodeRef on the Machine object, remove the Node by using the
// value of the NodeRef. If there's no NodeRef, try to find the Node by
// listing nodes using the NodeOwner label selector.
if machine.Status.NodeRef != nil {
objKey := ctrlruntimeclient.ObjectKey{Name: machine.Status.NodeRef.Name}
node := &corev1.Node{}
nodeFound := true
if err := r.client.Get(ctx, objKey, node); err != nil {
func (r *Reconciler) deleteNodeForMachine(ctx context.Context, nodes []*corev1.Node, machine *clusterv1alpha1.Machine) error {
// iterates on all nodes and delete them. Finally, remove the finalizer on the machine
for _, node := range nodes {
if err := r.client.Delete(ctx, node); err != nil {
if !kerrors.IsNotFound(err) {
return fmt.Errorf("failed to get node %s: %v", machine.Status.NodeRef.Name, err)
}
nodeFound = false
klog.V(2).Infof("node %q does not longer exist for machine %q", machine.Status.NodeRef.Name, machine.Spec.Name)
}

if nodeFound {
if err := r.client.Delete(ctx, node); err != nil {
if !kerrors.IsNotFound(err) {
return err
}
klog.V(2).Infof("node %q does not longer exist for machine %q", machine.Status.NodeRef.Name, machine.Spec.Name)
}
}
} else {
selector, err := labels.Parse(NodeOwnerLabelName + "=" + string(machine.UID))
if err != nil {
return fmt.Errorf("failed to parse label selector: %v", err)
}
listOpts := &ctrlruntimeclient.ListOptions{LabelSelector: selector}
nodes := &corev1.NodeList{}
if err := r.client.List(ctx, nodes, listOpts); err != nil {
return fmt.Errorf("failed to list nodes: %v", err)
}
if len(nodes.Items) == 0 {
// We just want log that we didn't found the node. We don't want to
// return here, as we want to remove finalizers at the end.
klog.V(3).Infof("No node found for the machine %s", machine.Spec.Name)
}

for _, node := range nodes.Items {
if err := r.client.Delete(ctx, &node); err != nil {
return err
}
klog.V(2).Infof("node %q does not longer exist for machine %q", machine.Status.NodeRef.Name, machine.Spec.Name)
}
}

Expand Down
53 changes: 33 additions & 20 deletions pkg/controller/machine/machine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ func TestControllerDeleteNodeForMachine(t *testing.T) {
tests := []struct {
name string
machine *clusterv1alpha1.Machine
nodes []runtime.Object
nodes []*corev1.Node
err error
shouldDeleteNode string
}{
Expand All @@ -489,13 +489,17 @@ func TestControllerDeleteNodeForMachine(t *testing.T) {
NodeRef: &corev1.ObjectReference{Name: "node-1"},
},
},
nodes: []runtime.Object{&corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node-0",
}}, &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node-1",
}},
nodes: []*corev1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-0",
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-1",
},
},
},
err: nil,
shouldDeleteNode: "node-1",
Expand All @@ -510,16 +514,16 @@ func TestControllerDeleteNodeForMachine(t *testing.T) {
},
Status: clusterv1alpha1.MachineStatus{},
},
nodes: []runtime.Object{
&corev1.Node{
nodes: []*corev1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-0",
Labels: map[string]string{
NodeOwnerLabelName: string(machineUID),
},
},
},
&corev1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-1",
},
Expand All @@ -538,13 +542,13 @@ func TestControllerDeleteNodeForMachine(t *testing.T) {
},
Status: clusterv1alpha1.MachineStatus{},
},
nodes: []runtime.Object{
&corev1.Node{
nodes: []*corev1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-0",
},
},
&corev1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-1",
},
Expand All @@ -564,10 +568,12 @@ func TestControllerDeleteNodeForMachine(t *testing.T) {
NodeRef: &corev1.ObjectReference{Name: "node-1"},
},
},
nodes: []runtime.Object{&corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node-0",
}},
nodes: []*corev1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-0",
},
},
},
err: nil,
shouldDeleteNode: "",
Expand All @@ -579,7 +585,9 @@ func TestControllerDeleteNodeForMachine(t *testing.T) {
ctx := context.Background()

objects := []runtime.Object{test.machine}
objects = append(objects, test.nodes...)
for _, n := range test.nodes {
objects = append(objects, n)
}

client := ctrlruntimefake.NewFakeClient(objects...)

Expand All @@ -595,7 +603,12 @@ func TestControllerDeleteNodeForMachine(t *testing.T) {
providerData: providerData,
}

err := reconciler.deleteNodeForMachine(ctx, test.machine)
nodes, err := reconciler.retrieveNodesRelatedToMachine(ctx, test.machine)
if err != nil {
return
}

err = reconciler.deleteNodeForMachine(ctx, nodes, test.machine)
if diff := deep.Equal(err, test.err); diff != nil {
t.Errorf("expected to get %v instead got: %v", test.err, err)
}
Expand Down
Loading