Skip to content

Commit

Permalink
Merge pull request #13768 from Madhu-1/fix-cephfs-fence
Browse files Browse the repository at this point in the history
core: Continue processing PVs for network fencing when no node IPs found
  • Loading branch information
travisn committed Feb 14, 2024
2 parents 1f65d84 + ca21bd9 commit 8ec85dc
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 10 deletions.
35 changes: 26 additions & 9 deletions pkg/operator/ceph/cluster/watcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package cluster
import (
"context"
"encoding/json"
stderrors "errors"
"fmt"
"strings"
"time"
Expand Down Expand Up @@ -54,6 +55,7 @@ type clientCluster struct {
var (
nodesCheckedForReconcile = sets.New[string]()
networkFenceLabel = "cephClusterUID"
errActiveClientNotFound = stderrors.New("active client not found")
)

// drivers that supports fencing, used in naming networkFence object
Expand Down Expand Up @@ -243,6 +245,10 @@ func (c *clientCluster) fenceNode(ctx context.Context, node *corev1.Node, cluste
if err == nil {
break
}
// continue to fence next rbd volume if active client not found
if stderrors.Is(err, errActiveClientNotFound) {
continue
}

if i == len(rbdPVList)-1 {
return pkgerror.Wrapf(err, "failed to fence rbd volumes")
Expand Down Expand Up @@ -275,6 +281,10 @@ func (c *clientCluster) fenceNode(ctx context.Context, node *corev1.Node, cluste
break
}

// continue to fence next rbd volume if active client not found
if stderrors.Is(err, errActiveClientNotFound) {
continue
}
if i == len(cephFSPVList)-1 {
return pkgerror.Wrapf(err, "failed to fence cephFS volumes")
}
Expand Down Expand Up @@ -401,11 +411,13 @@ func (c *clientCluster) fenceRbdImage(
if err != nil {
return pkgerror.Wrapf(err, "failed to unmarshal rbd status output")
}
if len(ips) != 0 {
err = c.createNetworkFence(ctx, rbdPV, node, cluster, ips, rbdDriver)
if err != nil {
return pkgerror.Wrapf(err, "failed to create network fence for node %q", node.Name)
}
if len(ips) == 0 {
logger.Infof("no active rbd clients found for rbd volume %q", rbdPV.Name)
return errActiveClientNotFound
}
err = c.createNetworkFence(ctx, rbdPV, node, cluster, ips, rbdDriver)
if err != nil {
return pkgerror.Wrapf(err, "failed to create network fence for node %q", node.Name)
}

return nil
Expand All @@ -419,7 +431,7 @@ func (c *clientCluster) fenceCephFSVolume(

status, err := cephclient.StatusWithUser(c.context, clusterInfo)
if err != nil {
return fmt.Errorf("failed to get ceph status for check active mds. %v", err)
return pkgerror.Wrapf(err, "failed to get ceph status for check active mds")
}

var activeMDS string
Expand All @@ -438,13 +450,18 @@ func (c *clientCluster) fenceCephFSVolume(
return fmt.Errorf("failed to list watchers for cephfs pool/subvoumeName %s/%s. %v", cephFSPV.Spec.CSI.VolumeAttributes["pool"], cephFSPV.Spec.CSI.VolumeAttributes["subvolumeName"], err)
}
ips, err := cephFSMDSClientMarshal(buf, cephFSPV)
if err != nil || ips == nil {
return fmt.Errorf("failed to unmarshal cephfs mds output. %v", err)
if err != nil {
return pkgerror.Wrapf(err, "failed to unmarshal cephfs mds output")
}

if len(ips) == 0 {
logger.Infof("no active mds clients found for cephfs volume %q", cephFSPV.Name)
return errActiveClientNotFound
}

err = c.createNetworkFence(ctx, cephFSPV, node, cluster, ips, cephfsDriver)
if err != nil {
return fmt.Errorf("failed to create network fence for node %q. %v", node.Name, err)
return pkgerror.Wrapf(err, "failed to create network fence for node %q", node.Name)
}

return nil
Expand Down
3 changes: 2 additions & 1 deletion pkg/operator/ceph/cluster/watcher_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ func TestHandleNodeFailure(t *testing.T) {
case command == "ceph" && args[0] == "status":
return `{"entity":[{"addr": [{"addr": "10.244.0.12:0", "nonce":3247243972}]}], "client_metadata":{"root":"/"}}`, nil
case command == "ceph" && args[0] == "tell":
return `[{"entity":{"addr":{"addr":"10.244.0.12:0","nonce":3247243972}}, "client_metadata":{"root":"/"}}]`, nil
return `[{"entity":{"addr":{"addr":"10.244.0.12:0","nonce":3247243972}}, "client_metadata":{"root":"/volumes/csi/csi-vol-58469d41-f6c0-4720-b23a-0a0826b842ca"}}]`, nil

}
return "", errors.Errorf("unexpected rbd/ceph command %q", args)
Expand Down Expand Up @@ -250,6 +250,7 @@ func TestHandleNodeFailure(t *testing.T) {
VolumeHandle: "0001-0009-rook-ceph-0000000000000002-24862838-240d-4215-9183-abfc0e9e4001",
VolumeAttributes: map[string]string{
"fsName": "myfs",
"subvolumePath": "/volumes/csi/csi-vol-58469d41-f6c0-4720-b23a-0a0826b842ca",
"subvolumeName": "csi-vol-58469d41-f6c0-4720-b23a-0a0826b842ca",
},
},
Expand Down

0 comments on commit 8ec85dc

Please sign in to comment.