Skip to content

Commit

Permalink
Bug 1990432: Make sure nodes don't have attached volumes before vm de…
Browse files Browse the repository at this point in the history
…letion

This commit checks if the node has some attached volumes before
deleting its vm. If there are any, it doesn't allow to delete the
vm and returns an error.
  • Loading branch information
Fedosin committed Aug 5, 2021
1 parent edaf826 commit e5026ca
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 1 deletion.
34 changes: 33 additions & 1 deletion pkg/controller/vsphere/actuator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,11 @@ func TestMachineEvents(t *testing.T) {
Value: providerSpec,
},
},
Status: machinev1.MachineStatus{
NodeRef: &v1.ObjectReference{
Name: "test",
},
},
}

// Create the machine
Expand All @@ -288,10 +293,37 @@ func TestMachineEvents(t *testing.T) {
// Ensure the machine has synced to the cache
getMachine := func() error {
machineKey := types.NamespacedName{Namespace: machine.Namespace, Name: machine.Name}
return k8sClient.Get(ctx, machineKey, machine)
return k8sClient.Get(ctx, machineKey, &machinev1.Machine{})
}
gs.Eventually(getMachine, timeout).Should(Succeed())

node := &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "test",
Namespace: "test",
Labels: map[string]string{
machinev1.MachineClusterIDLabel: "CLUSTERID",
},
},
Spec: v1.NodeSpec{},
Status: v1.NodeStatus{
VolumesAttached: []v1.AttachedVolume{},
},
}

// Create the node
gs.Expect(k8sClient.Create(ctx, node)).To(Succeed())
defer func() {
gs.Expect(k8sClient.Delete(ctx, node)).To(Succeed())
}()

// Ensure the node has synced to the cache
getNode := func() error {
nodeKey := types.NamespacedName{Name: node.Name}
return k8sClient.Get(ctx, nodeKey, &v1.Node{})
}
gs.Eventually(getNode, timeout).Should(Succeed())

taskIDCache := make(map[string]string)
params := ActuatorParams{
Client: k8sClient,
Expand Down
29 changes: 29 additions & 0 deletions pkg/controller/vsphere/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ import (
"github.com/vmware/govmomi/vim25/mo"
"github.com/vmware/govmomi/vim25/types"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
apimachinerytypes "k8s.io/apimachinery/pkg/types"
"k8s.io/klog/v2"
)

Expand Down Expand Up @@ -284,6 +286,15 @@ func (r *Reconciler) delete() error {
return fmt.Errorf("%v: Failed to detach virtual disks related to pvs: %w", r.machine.GetName(), err)
}
}

// After node draining, make sure volumes are detached before deleting the Node.
ok, err := r.nodeHasVolumesAttached(r.Context, r.machine.Status.NodeRef.Name, r.machine.Name)
if err != nil {
return fmt.Errorf("failed to get if node %v has attached volumes: %w", r.machine.Status.NodeRef.Name, err)
}
if ok {
return fmt.Errorf("node %v has attached volumes, requeuing", r.machine.Status.NodeRef.Name)
}
}

if _, err := vm.powerOffVM(); err != nil {
Expand All @@ -310,6 +321,24 @@ func (r *Reconciler) delete() error {
return fmt.Errorf("destroying vm in progress, reconciling")
}

// nodeHasVolumesAttached returns true if node status still have volumes attached
// pod deletion and volume detach happen asynchronously, so pod could be deleted before volume detached from the node
// this could cause issue for some storage provisioner, for example, vsphere-volume this is problematic
// because if the node is deleted before detach success, then the underline VMDK will be deleted together with the Machine
// so after node draining we need to check if all volumes are detached before deleting the node.
func (r *Reconciler) nodeHasVolumesAttached(ctx context.Context, nodeName string, machineName string) (bool, error) {
node := &corev1.Node{}
if err := r.apiReader.Get(ctx, apimachinerytypes.NamespacedName{Name: nodeName}, node); err != nil {
if apierrors.IsNotFound(err) {
klog.Errorf("Could not find node from noderef, it may have already been deleted: %w", err)
return false, nil
}
return true, err
}

return len(node.Status.VolumesAttached) != 0, nil
}

// reconcileMachineWithCloudState reconcile machineSpec and status with the latest cloud state
func (r *Reconciler) reconcileMachineWithCloudState(vm *virtualMachine, taskRef string) error {
klog.V(3).Infof("%v: reconciling machine with cloud state", r.machine.GetName())
Expand Down

0 comments on commit e5026ca

Please sign in to comment.