Skip to content

Commit

Permalink
csi: update the deletion of network fence
Browse files Browse the repository at this point in the history
this commit updates the removal of network fence cr based on
their drivers

Signed-off-by: Riya Singhal <rsinghal@redhat.com>
  • Loading branch information
riya-singhal31 committed Jan 24, 2024
1 parent 93bdcc1 commit 400ea02
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 9 deletions.
66 changes: 58 additions & 8 deletions pkg/operator/ceph/cluster/watcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,14 @@ func (c *clientCluster) handleNodeFailure(ctx context.Context, cluster *cephv1.C
return nil
}

err = c.unfenceAndDeleteNetworkFence(ctx, *node, cluster)
err = c.unfenceAndDeleteRbdNetworkFence(ctx, *node, cluster)
if err != nil {
return pkgerror.Wrapf(err, "failed to delete network fence for node %q.", node.Name)
return pkgerror.Wrapf(err, "failed to delete rbd network fence for node %q.", node.Name)
}

err = c.unfenceAndDeleteCephFSNetworkFence(ctx, *node, cluster)
if err != nil {
return pkgerror.Wrapf(err, "failed to delete cephFS network fence for node %q.", node.Name)
}

return nil
Expand Down Expand Up @@ -343,7 +348,7 @@ func listRWOCephFSPV(listPVs *corev1.PersistentVolumeList, cluster *cephv1.CephC
continue
}

if pv.Spec.CSI.VolumeAttributes["staticVolume"] == "true" || pv.Spec.CSI.VolumeAttributes["pool"] == "" {
if pv.Spec.CSI.VolumeAttributes["staticVolume"] == "true" || pv.Spec.CSI.VolumeAttributes["fsName"] == "" {
logger.Debugf("skipping, static pv %q", pv.Name)
continue
}
Expand Down Expand Up @@ -546,9 +551,54 @@ func (c *clientCluster) createNetworkFence(ctx context.Context, pv corev1.Persis
return nil
}

func (c *clientCluster) unfenceAndDeleteNetworkFence(ctx context.Context, node corev1.Node, cluster *cephv1.CephCluster) error {
func (c *clientCluster) unfenceAndDeleteRbdNetworkFence(ctx context.Context, node corev1.Node, cluster *cephv1.CephCluster) error {
networkFence := &addonsv1alpha1.NetworkFence{}
err := c.client.Get(ctx, types.NamespacedName{Name: node.Name + "rbd", Namespace: cluster.Namespace}, networkFence)
if err != nil && !errors.IsNotFound(err) {
return err
} else if errors.IsNotFound(err) {
return nil
}
logger.Infof("node %s does not have taint %s, unfencing networkFence CR", node.Name, corev1.TaintNodeOutOfService)

// Unfencing is required to unblock the node and then delete the network fence CR
networkFence.Spec.FenceState = addonsv1alpha1.Unfenced
err = c.client.Update(ctx, networkFence)
if err != nil {
logger.Errorf("failed to unFence network fence CR. %v", err)
return err
}

err = wait.PollUntilContextTimeout(ctx, 2*time.Second, 60*time.Second, true, func(ctx context.Context) (bool, error) {
err = c.client.Get(ctx, types.NamespacedName{Name: node.Name + "rbd", Namespace: cluster.Namespace}, networkFence)
if err != nil && !errors.IsNotFound(err) {
return false, err
}

if networkFence.Spec.FenceState != addonsv1alpha1.Unfenced {
logger.Infof("waiting for network fence CR %s to get in %s state before deletion", networkFence.Name, addonsv1alpha1.Unfenced)
return false, err
}

logger.Infof("successfully unfenced rbd network fence cr %q, proceeding with deletion", networkFence.Name)

err = c.client.Delete(ctx, networkFence)
if err == nil || errors.IsNotFound(err) {
logger.Infof("successfully deleted network fence CR %s", networkFence.Name)
return true, nil
}
return false, nil
})
if err != nil {
return pkgerror.Wrapf(err, "timeout out deleting the rbd network fence CR %s", networkFence.Name)
}

return nil
}

func (c *clientCluster) unfenceAndDeleteCephFSNetworkFence(ctx context.Context, node corev1.Node, cluster *cephv1.CephCluster) error {
networkFence := &addonsv1alpha1.NetworkFence{}
err := c.client.Get(ctx, types.NamespacedName{Name: node.Name, Namespace: cluster.Namespace}, networkFence)
err := c.client.Get(ctx, types.NamespacedName{Name: node.Name + "cephFS", Namespace: cluster.Namespace}, networkFence)
if err != nil && !errors.IsNotFound(err) {
return err
} else if errors.IsNotFound(err) {
Expand All @@ -565,7 +615,7 @@ func (c *clientCluster) unfenceAndDeleteNetworkFence(ctx context.Context, node c
}

err = wait.PollUntilContextTimeout(ctx, 2*time.Second, 60*time.Second, true, func(ctx context.Context) (bool, error) {
err = c.client.Get(ctx, types.NamespacedName{Name: node.Name, Namespace: cluster.Namespace}, networkFence)
err = c.client.Get(ctx, types.NamespacedName{Name: node.Name + "cephFS", Namespace: cluster.Namespace}, networkFence)
if err != nil && !errors.IsNotFound(err) {
return false, err
}
Expand All @@ -575,7 +625,7 @@ func (c *clientCluster) unfenceAndDeleteNetworkFence(ctx context.Context, node c
return false, err
}

logger.Infof("successfully unfenced network fence cr %q, proceeding with deletion", networkFence.Name)
logger.Infof("successfully unfenced cephFS network fence cr %q, proceeding with deletion", networkFence.Name)

err = c.client.Delete(ctx, networkFence)
if err == nil || errors.IsNotFound(err) {
Expand All @@ -585,7 +635,7 @@ func (c *clientCluster) unfenceAndDeleteNetworkFence(ctx context.Context, node c
return false, nil
})
if err != nil {
return pkgerror.Wrapf(err, "timeout out deleting the network fence CR %s", networkFence.Name)
return pkgerror.Wrapf(err, "timeout out deleting the cephFS network fence CR %s", networkFence.Name)
}

return nil
Expand Down
4 changes: 3 additions & 1 deletion pkg/operator/ceph/cluster/watcher_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,10 @@ func TestHandleNodeFailure(t *testing.T) {
switch {
case command == "rbd" && args[0] == "status":
return `{"watchers":[{"address":"192.168.39.137:0/3762982934","client":4307,"cookie":18446462598732840961}]}`, nil
case command == "ceph" && args[0] == "status":
return `{"watchers":[{"address":"192.169.39.137:0/3762982934","client":5201,"cookie":18446462598732840961}]}`, nil
case command == "ceph" && args[0] == "tell":
return `{"watchers":[{"id":5201,"entity":[{"addr": [{"addr": "10.244.0.12:0", "nonce":3247243972}]}]]}`, nil
return `{"watchers":[{"id":5201,"entity":[{"addr": [{"addr": "10.244.0.12:0", "nonce":3247243972}]}]}]}`, nil

}
return "", errors.Errorf("unexpected rbd/ceph command %q", args)
Expand Down

0 comments on commit 400ea02

Please sign in to comment.