ceph: improve upgrade procedure

When a cluster is updated with a different image version, this triggers a serialized restart of all the pods. Prior to this commit, no safety check were performed and rook was hoping for the best outcome. Now before doing restarting a daemon we check it can be restarted. Once it's restarted we also check we can pursue with the rest of the platform. For instance, with monitors we check that they are in quorum, for OSD we check that PGs are clean and for MDS we make sure they are all active. Fixes: rook#2889 Signed-off-by: Sébastien Han <seb@redhat.com>
leseb · Jun 21, 2019 · b28c4af · b28c4af
1 parent 818ad9c
commit b28c4af
Show file tree

Hide file tree

Showing 13 changed files with 520 additions and 45 deletions.
diff --git a/pkg/daemon/ceph/client/status.go b/pkg/daemon/ceph/client/status.go
@@ -52,6 +52,7 @@ type CephStatus struct {
 	} `json:"osdmap"`
 	PgMap  PgMap  `json:"pgmap"`
 	MgrMap MgrMap `json:"mgrmap"`
+	Fsmap  Fsmap  `json:"fsmap"`
 }
 
 type HealthStatus struct {
@@ -123,6 +124,23 @@ type PgStateEntry struct {
 	Count     int    `json:"count"`
 }
 
+// Fsmap is a struct representing the filesystem map
+type Fsmap struct {
+	Epoch  int `json:"epoch"`
+	ID     int `json:"id"`
+	Up     int `json:"up"`
+	In     int `json:"in"`
+	Max    int `json:"max"`
+	ByRank []struct {
+		FilesystemID int    `json:"filesystem_id"`
+		Rank         int    `json:"rank"`
+		Name         string `json:"name"`
+		Status       string `json:"status"`
+		Gid          int    `json:"gid"`
+	} `json:"by_rank"`
+	UpStandby int `json:"up:standby"`
+}
+
 func Status(context *clusterd.Context, clusterName string, debug bool) (CephStatus, error) {
 	args := []string{"status"}
 	cmd := NewCephCommand(context, clusterName, args)
@@ -171,3 +189,50 @@ func isClusterClean(status CephStatus) error {
 
 	return fmt.Errorf("cluster is not fully clean. PGs: %+v", status.PgMap.PgsByState)
 }
+
+// getMDSRank returns the rank of a given MDS
+func getMDSRank(status CephStatus, clusterName, mdsName string) (int, error) {
+	// dummy rank
+	mdsRank := -1000
+	for r := range status.Fsmap.ByRank {
+		if status.Fsmap.ByRank[r].Name == mdsName {
+			mdsRank = r
+		}
+	}
+	// if the mds is not shown in the map one reason might be because it's in standby
+	// if not in standby there is something else going wron
+	if mdsRank < 0 && status.Fsmap.UpStandby < 1 {
+		// it might seem strange to log an error since this could be a warning too
+		// it is a warning until we reach the timeout, this should give enough time to the mds to transtion its state
+		// after the timeout we consider that the mds might be gone or the timeout was not long enough...
+		return mdsRank, fmt.Errorf("mds %s not found in fsmap, this likely means mdss are transitioning between active and standby states", mdsName)
+	}
+
+	return mdsRank, nil
+}
+
+// MdsActiveOrStandbyReplay returns wether a given MDS is active or in standby
+func MdsActiveOrStandbyReplay(context *clusterd.Context, clusterName, mdsName string) error {
+	status, err := Status(context, clusterName, false)
+	if err != nil {
+		return err
+	}
+
+	mdsRank, err := getMDSRank(status, clusterName, mdsName)
+	if err != nil {
+		return fmt.Errorf("%+v", err)
+	}
+
+	// this MDS is in standby so let's return immediatly
+	if mdsRank < 0 {
+		logger.Infof("mds %s is in standby, nothing to check", mdsName)
+		return nil
+	}
+
+	if status.Fsmap.ByRank[mdsRank].Status == "up:active" || status.Fsmap.ByRank[mdsRank].Status == "up:standby-replay" || status.Fsmap.ByRank[mdsRank].Status == "up:standby" {
+		logger.Infof("mds %s is %s", mdsName, status.Fsmap.ByRank[mdsRank].Status)
+		return nil
+	}
+
+	return fmt.Errorf("mds %s is %s, bad state", mdsName, status.Fsmap.ByRank[mdsRank].Status)
+}
diff --git a/pkg/daemon/ceph/client/status_test.go b/pkg/daemon/ceph/client/status_test.go