Skip to content

Commit

Permalink
Make sure nodes don't have attached volumes before vm deletion
Browse files Browse the repository at this point in the history
This commit checks if the node has some attached volumes before
deleting its vm. If there are any, it doesn't allow to delete the
vm and returns an error.
  • Loading branch information
Fedosin committed Aug 4, 2021
1 parent edaf826 commit 7a670cd
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 1 deletion.
34 changes: 33 additions & 1 deletion pkg/controller/vsphere/actuator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,11 @@ func TestMachineEvents(t *testing.T) {
Value: providerSpec,
},
},
Status: machinev1.MachineStatus{
NodeRef: &v1.ObjectReference{
Name: "test",
},
},
}

// Create the machine
Expand All @@ -288,10 +293,37 @@ func TestMachineEvents(t *testing.T) {
// Ensure the machine has synced to the cache
getMachine := func() error {
machineKey := types.NamespacedName{Namespace: machine.Namespace, Name: machine.Name}
return k8sClient.Get(ctx, machineKey, machine)
return k8sClient.Get(ctx, machineKey, &machinev1.Machine{})
}
gs.Eventually(getMachine, timeout).Should(Succeed())

node := &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "test",
Namespace: "test",
Labels: map[string]string{
machinev1.MachineClusterIDLabel: "CLUSTERID",
},
},
Spec: v1.NodeSpec{},
Status: v1.NodeStatus{
VolumesAttached: []v1.AttachedVolume{},
},
}

// Create the node
gs.Expect(k8sClient.Create(ctx, node)).To(Succeed())
defer func() {
gs.Expect(k8sClient.Delete(ctx, node)).To(Succeed())
}()

// Ensure the node has synced to the cache
getNode := func() error {
nodeKey := types.NamespacedName{Name: node.Name}
return k8sClient.Get(ctx, nodeKey, &v1.Node{})
}
gs.Eventually(getNode, timeout).Should(Succeed())

taskIDCache := make(map[string]string)
params := ActuatorParams{
Client: k8sClient,
Expand Down
29 changes: 29 additions & 0 deletions pkg/controller/vsphere/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ import (
"github.com/vmware/govmomi/vim25/mo"
"github.com/vmware/govmomi/vim25/types"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
apimachinerytypes "k8s.io/apimachinery/pkg/types"
"k8s.io/klog/v2"
)

Expand Down Expand Up @@ -284,6 +286,15 @@ func (r *Reconciler) delete() error {
return fmt.Errorf("%v: Failed to detach virtual disks related to pvs: %w", r.machine.GetName(), err)
}
}

// After node draining, make sure volumes are detached before deleting the Node.
ok, err := r.shouldWaitForNodeVolumes(r.Context, r.machine.Status.NodeRef.Name, r.machine.Name)
if err != nil {
return fmt.Errorf("failed to get if node %v has attached volumes: %w", r.machine.Status.NodeRef.Name, err)
}
if ok {
return fmt.Errorf("node %v has attached volumes, requeuing", r.machine.Status.NodeRef.Name)
}
}

if _, err := vm.powerOffVM(); err != nil {
Expand All @@ -310,6 +321,24 @@ func (r *Reconciler) delete() error {
return fmt.Errorf("destroying vm in progress, reconciling")
}

// shouldWaitForNodeVolumes returns true if node status still have volumes attached
// pod deletion and volume detach happen asynchronously, so pod could be deleted before volume detached from the node
// this could cause issue for some storage provisioner, for example, vsphere-volume this is problematic
// because if the node is deleted before detach success, then the underline VMDK will be deleted together with the Machine
// so after node draining we need to check if all volumes are detached before deleting the node.
func (r *Reconciler) shouldWaitForNodeVolumes(ctx context.Context, nodeName string, machineName string) (bool, error) {
node := &corev1.Node{}
if err := r.apiReader.Get(ctx, apimachinerytypes.NamespacedName{Name: nodeName}, node); err != nil {
if apierrors.IsNotFound(err) {
klog.Errorf("Could not find node from noderef, it may have already been deleted: %w", err)
return false, nil
}
return true, err
}

return len(node.Status.VolumesAttached) != 0, nil
}

// reconcileMachineWithCloudState reconcile machineSpec and status with the latest cloud state
func (r *Reconciler) reconcileMachineWithCloudState(vm *virtualMachine, taskRef string) error {
klog.V(3).Infof("%v: reconciling machine with cloud state", r.machine.GetName())
Expand Down

0 comments on commit 7a670cd

Please sign in to comment.