Skip to content

Commit

Permalink
sidecar: add maintenance mode
Browse files Browse the repository at this point in the history
Pod in maintenance mode will always fail readiness probe.
As a result, k8s will remove it from load balancer.
Pod will stay alive to allow user interacting with Scylla process.
  • Loading branch information
zimnx committed Dec 18, 2020
1 parent 012221a commit 2787202
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 2 deletions.
39 changes: 37 additions & 2 deletions pkg/controllers/sidecar/checks.go
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"net/http"

"github.com/scylladb/scylla-operator/pkg/controllers/sidecar/identity"
"github.com/scylladb/scylla-operator/pkg/util/network"

"github.com/pkg/errors"
Expand All @@ -25,18 +26,41 @@ func (mc *MemberReconciler) setupHTTPChecks(ctx context.Context) {
}
}

func nodeUnderMaintenance(ctx context.Context, mc *MemberReconciler) (bool, error) {
member, err := identity.Retrieve(ctx, mc.member.Name, mc.member.Namespace, mc.kubeClient)
if err != nil {
return false, errors.Wrap(err, "get member service")
}

_, ok := member.ServiceLabels[naming.NodeMaintenanceLabel]
return ok, nil
}

func livenessCheck(mc *MemberReconciler) func(http.ResponseWriter, *http.Request) {
return func(w http.ResponseWriter, req *http.Request) {
ctx := log.WithTraceID(req.Context())

if maintenance, err := nodeUnderMaintenance(ctx, mc); err != nil {
w.WriteHeader(http.StatusServiceUnavailable)
mc.logger.Error(ctx, "Liveness check failed", "error", err)
return
} else if maintenance {
// During maintenance Pod should stay alive.
w.WriteHeader(http.StatusOK)
mc.logger.Info(ctx, "Node under maintenance")
return
}

host, err := network.FindFirstNonLocalIP()
if err != nil {
w.WriteHeader(http.StatusServiceUnavailable)
mc.logger.Error(log.WithTraceID(req.Context()), "Liveness check failed", "error", err)
mc.logger.Error(ctx, "Liveness check failed", "error", err)
return
}
// Check if JMX is reachable
_, err = mc.scyllaClient.Ping(context.Background(), host.String())
if err != nil {
mc.logger.Error(log.WithTraceID(req.Context()), "Liveness check failed", "error", err)
mc.logger.Error(ctx, "Liveness check failed", "error", err)
w.WriteHeader(http.StatusServiceUnavailable)
return
}
Expand All @@ -48,6 +72,17 @@ func readinessCheck(mc *MemberReconciler) func(http.ResponseWriter, *http.Reques
return func(w http.ResponseWriter, req *http.Request) {
ctx := log.WithTraceID(req.Context())

if maintenance, err := nodeUnderMaintenance(ctx, mc); err != nil {
w.WriteHeader(http.StatusServiceUnavailable)
mc.logger.Error(ctx, "Readiness check failed", "error", err)
return
} else if maintenance {
// During maintenance Pod shouldn't be declare to be ready.
w.WriteHeader(http.StatusServiceUnavailable)
mc.logger.Info(ctx, "Node under maintenance")
return
}

host, err := network.FindFirstNonLocalIP()
if err != nil {
w.WriteHeader(http.StatusServiceUnavailable)
Expand Down
4 changes: 4 additions & 0 deletions pkg/naming/constants.go
Expand Up @@ -22,6 +22,10 @@ const (
// ReplaceLabel express the intent to replace pod under the specific member.
ReplaceLabel = "scylla/replace"

// NodeMaintenanceLabel means that node is under maintenance.
// Readiness check will always fail when this label is added to member service.
NodeMaintenanceLabel = "scylla/node-maintenance"

LabelValueTrue = "true"
LabelValueFalse = "false"
)
Expand Down

0 comments on commit 2787202

Please sign in to comment.