-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
When a cluster is updated with a different image version, this triggers a serialized restart of all the pods. Prior to this commit, no safety check were performed and rook was hoping for the best outcome. Now before doing restarting a daemon we check it can be restarted. Once it's restarted we also check we can pursue with the rest of the platform. For instance, with monitors we check that they are in quorum, for OSD we check that PGs are clean and for MDS we make sure they are active. Fixes: rook#2889 Signed-off-by: Sébastien Han <seb@redhat.com>
- Loading branch information
Showing
13 changed files
with
337 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
/* | ||
Copyright 2019 The Rook Authors. All rights reserved. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package upgrade | ||
|
||
import ( | ||
"encoding/json" | ||
"fmt" | ||
"strings" | ||
|
||
"github.com/coreos/pkg/capnslog" | ||
"github.com/rook/rook/pkg/clusterd" | ||
"github.com/rook/rook/pkg/daemon/ceph/client" | ||
cephver "github.com/rook/rook/pkg/operator/ceph/version" | ||
) | ||
|
||
// CephDaemonsVersions is a structure that can be used to parsed the output of the 'ceph versions' command | ||
type CephDaemonsVersions struct { | ||
Mon map[string]int `json:"mon,omitempty"` | ||
Mgr map[string]int `json:"mgr,omitempty"` | ||
Mds map[string]int `json:"mds,omitempty"` | ||
Overall map[string]int `json:"overall,omitempty"` | ||
} | ||
|
||
var ( | ||
logger = capnslog.NewPackageLogger("github.com/rook/rook", "upgrade") | ||
) | ||
|
||
func getCephMonVersionString(context *clusterd.Context) (string, error) { | ||
output, err := context.Executor.ExecuteCommandWithOutput(false, "", "ceph", "version") | ||
if err != nil { | ||
return "", fmt.Errorf("failed to run ceph tell: %+v", err) | ||
} | ||
logger.Debug(output) | ||
|
||
return output, nil | ||
} | ||
|
||
func getCephMonVersionsString(context *clusterd.Context) (string, error) { | ||
output, err := context.Executor.ExecuteCommandWithOutput(false, "", "ceph", "versions") | ||
if err != nil { | ||
return "", fmt.Errorf("failed to run ceph tell: %+v", err) | ||
} | ||
logger.Debug(output) | ||
|
||
return output, nil | ||
} | ||
|
||
// GetCephMonVersion reports the Ceph version of all the monitors, or at least a majority with quorum | ||
func GetCephMonVersion(context *clusterd.Context) (*cephver.CephVersion, error) { | ||
output, err := getCephMonVersionString(context) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to run ceph tell: %+v", err) | ||
} | ||
logger.Debug(output) | ||
|
||
v, err := cephver.ExtractCephVersion(output) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to extract ceph version. %+v", err) | ||
} | ||
|
||
return v, nil | ||
} | ||
|
||
// GetCephVersions reports the Ceph version of each daemon in the cluster | ||
func GetCephVersions(context *clusterd.Context) (*CephDaemonsVersions, error) { | ||
output, err := getCephMonVersionsString(context) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to run ceph tell: %+v", err) | ||
} | ||
logger.Debug(output) | ||
|
||
var cephVersionsResult CephDaemonsVersions | ||
err = json.Unmarshal([]byte(output), &cephVersionsResult) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to retrieve ceph versions results. %+v", err) | ||
} | ||
|
||
return &cephVersionsResult, nil | ||
} | ||
|
||
// EnableMessenger2 enable the messenger 2 protocol on Nautilus clusters | ||
func EnableMessenger2(context *clusterd.Context, namespace string) error { | ||
args := []string{"mon", "enable-msgr2"} | ||
_, err := client.ExecuteCephCommand(context, namespace, args) | ||
if err != nil { | ||
return fmt.Errorf("failed to enable msgr2 protocol: %+v", err) | ||
} | ||
logger.Infof("successfully enabled msgr2 protocol") | ||
|
||
return nil | ||
} | ||
|
||
// OkToStopOrContinue determines wether it's ok to stop or continue an upgrade | ||
func OkToStopOrContinue(context *clusterd.Context, namespace, deployment, daemon, action string, cephVersion cephver.CephVersion) error { | ||
|
||
// The ok-to-stop command for osd and mds landed on 14.2.1 | ||
// so we return nil if that Ceph version is not satisfied | ||
if !cephVersion.IsAtLeast(cephver.CephVersion{Major: 14, Minor: 2, Extra: 1}) { | ||
if action == "stop" && daemon != "mon" { | ||
return nil | ||
} | ||
} | ||
|
||
if action == "stop" { | ||
err := OkToStopDaemon(context, deployment, daemon) | ||
if err != nil { | ||
return fmt.Errorf("failed to check if %s was ok to %s", deployment, action) | ||
} | ||
} | ||
|
||
if action == "continue" { | ||
// the mon case is handled directly in the deployment where the mon checks for quorum | ||
switch daemon { | ||
case "osd": | ||
err := OkToContinueOSDDaemon(context, namespace) | ||
if err != nil { | ||
return fmt.Errorf("failed to check if %s was ok to %s", deployment, action) | ||
} | ||
case "mds": | ||
err := OkToContinueMDSDaemon(context, namespace, deployment, daemon) | ||
if err != nil { | ||
return fmt.Errorf("failed to check if %s was ok to %s", deployment, action) | ||
} | ||
} | ||
} | ||
|
||
return nil | ||
} | ||
|
||
// OkToStopDaemon determines whether it's fine to stop a Ceph daemon | ||
func OkToStopDaemon(context *clusterd.Context, deployment, daemon string) error { | ||
deploymentSplit := strings.Split(deployment, "-") | ||
daemonID := deploymentSplit[len(deploymentSplit)-1] | ||
|
||
output, err := context.Executor.ExecuteCommandWithOutput(false, "", "ceph", daemon, "ok-to-stop", daemonID) | ||
if err != nil { | ||
return fmt.Errorf("deployment %s cannot be stopped: %+v", deployment, err) | ||
} | ||
logger.Infof("deployment %s is ok to be updated. %s", deployment, output) | ||
|
||
return nil | ||
} | ||
|
||
// OkToContinueOSDDaemon determines whether it's fine to go to the next osd during an upgrade | ||
// This basically makes sure all the PGs have settled | ||
func OkToContinueOSDDaemon(context *clusterd.Context, namespace string) error { | ||
if err := client.IsClusterClean(context, namespace); err != nil { | ||
return err | ||
} | ||
|
||
return nil | ||
} | ||
|
||
// OkToContinueMDSDaemon determines whether it's fine to go to the next mds during an upgrade | ||
func OkToContinueMDSDaemon(context *clusterd.Context, namespace, deployment, daemon string) error { | ||
return nil | ||
} |
Oops, something went wrong.