From bd216615cb8381e3ff1fa10c9b33e5d4af6ebceb Mon Sep 17 00:00:00 2001 From: David Sauer Date: Sat, 28 Nov 2020 22:52:08 +0100 Subject: [PATCH] taint node with PreferNoSchedule to avoid double draining of pods --- cmd/kured/main.go | 44 +++++++++----- pkg/taints/taints.go | 140 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 170 insertions(+), 14 deletions(-) create mode 100644 pkg/taints/taints.go diff --git a/cmd/kured/main.go b/cmd/kured/main.go index a9312884a..04b927bda 100644 --- a/cmd/kured/main.go +++ b/cmd/kured/main.go @@ -24,6 +24,7 @@ import ( "github.com/weaveworks/kured/pkg/daemonsetlock" "github.com/weaveworks/kured/pkg/delaytick" "github.com/weaveworks/kured/pkg/notifications/slack" + "github.com/weaveworks/kured/pkg/taints" "github.com/weaveworks/kured/pkg/timewindow" ) @@ -31,20 +32,21 @@ var ( version = "unreleased" // Command line flags - period time.Duration - dsNamespace string - dsName string - lockAnnotation string - lockTTL time.Duration - prometheusURL string - alertFilter *regexp.Regexp - rebootSentinel string - slackHookURL string - slackUsername string - slackChannel string - messageTemplateDrain string - messageTemplateReboot string - podSelectors []string + period time.Duration + dsNamespace string + dsName string + lockAnnotation string + lockTTL time.Duration + prometheusURL string + alertFilter *regexp.Regexp + rebootSentinel string + preferNoScheduleTaintName string + slackHookURL string + slackUsername string + slackChannel string + messageTemplateDrain string + messageTemplateReboot string + podSelectors []string rebootDays []string rebootStart string @@ -85,6 +87,8 @@ func main() { "alert names to ignore when checking for active alerts") rootCmd.PersistentFlags().StringVar(&rebootSentinel, "reboot-sentinel", "/var/run/reboot-required", "path to file whose existence signals need to reboot") + rootCmd.PersistentFlags().StringVar(&preferNoScheduleTaintName, "prefer-no-schedule-taint", "weave.works/kured-node-reboot", + "taint name used for marking nodes pending reboot (to prevent them from getting a pod when another nodes reboots)") rootCmd.PersistentFlags().StringVar(&slackHookURL, "slack-hook-url", "", "slack hook URL for reboot notfications") @@ -336,10 +340,19 @@ func rebootAsRequired(nodeID string, window *timewindow.TimeWindow, TTL time.Dur release(lock) } + preferNoScheduleTaint := taints.New(client, nodeID, preferNoScheduleTaintName, v1.TaintEffectPreferNoSchedule) + + // Remove taint immediately during startup to quickly allow scheduling again. + if !rebootRequired() { + preferNoScheduleTaint.Disable() + } + source := rand.NewSource(time.Now().UnixNano()) tick := delaytick.New(source, period) for range tick { if !window.Contains(time.Now()) { + // Remove taint outside the reboot time window to allow for normal operation. + preferNoScheduleTaint.Disable() continue } @@ -348,6 +361,8 @@ func rebootAsRequired(nodeID string, window *timewindow.TimeWindow, TTL time.Dur } if rebootBlocked(client, nodeID) { + // Prefer to not schedule pods onto this node to avoid draing the same pod multiple times. + preferNoScheduleTaint.Enable() continue } @@ -392,6 +407,7 @@ func root(cmd *cobra.Command, args []string) { } else { log.Info("Lock TTL not set, lock will remain until being released") } + log.Infof("PreferNoSchedule taint: %s", preferNoScheduleTaintName) log.Infof("Reboot Sentinel: %s every %v", rebootSentinel, period) log.Infof("Blocking Pod Selectors: %v", podSelectors) log.Infof("Reboot on: %v", window) diff --git a/pkg/taints/taints.go b/pkg/taints/taints.go new file mode 100644 index 000000000..c688749eb --- /dev/null +++ b/pkg/taints/taints.go @@ -0,0 +1,140 @@ +package taints + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/prometheus/common/log" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" +) + +// Taint allows to set soft and hard limitations for scheduling and executing pods on nodes. +type Taint struct { + client *kubernetes.Clientset + nodeID string + taintName string + effect v1.TaintEffect +} + +// New provides a new taint. +func New(client *kubernetes.Clientset, nodeID, taintName string, effect v1.TaintEffect) *Taint { + return &Taint{ + client: client, + nodeID: nodeID, + taintName: taintName, + effect: effect, + } +} + +// Enable creates the taint for a node. Creating an existing taint is a noop. +func (t *Taint) Enable() { + preferNoSchedule(t.client, t.nodeID, t.taintName, t.effect, true) +} + +// Disable removes the taint for a node. Removing a missing taint is a noop. +func (t *Taint) Disable() { + preferNoSchedule(t.client, t.nodeID, t.taintName, t.effect, false) +} + +func preferNoSchedule(client *kubernetes.Clientset, nodeID, taintName string, effect v1.TaintEffect, taintShouldExists bool) { + updatedNode, err := client.CoreV1().Nodes().Get(context.TODO(), nodeID, metav1.GetOptions{}) + if err != nil || updatedNode == nil { + log.Fatalf("Error reading node %s: %v", nodeID, err) + } + + taintExists := false + offset := 0 + for i, taint := range updatedNode.Spec.Taints { + if taint.Key == taintName { + taintExists = true + offset = i + break + } + } + + if taintExists && taintShouldExists { + log.Debugf("Taint %v exists already for node %v.", taintName, nodeID) + return + } + + if !taintExists && !taintShouldExists { + log.Debugf("Taint %v already missing for node %v.", taintName, nodeID) + return + } + + type patchTaints struct { + Op string `json:"op"` + Path string `json:"path"` + Value interface{} `json:"value,omitempty"` + } + + taint := v1.Taint{ + Key: taintName, + Effect: effect, + } + + var patches []patchTaints + + if len(updatedNode.Spec.Taints) == 0 { + // add first taint and ensure to keep current taints + patches = []patchTaints{ + { + Op: "test", + Path: "/spec", + Value: updatedNode.Spec, + }, + { + Op: "add", + Path: "/spec/taints", + Value: []v1.Taint{}, + }, + { + Op: "add", + Path: "/spec/taints/-", + Value: taint, + }, + } + } else if taintExists { + // remove taint and ensure to test against race conditions + patches = []patchTaints{ + { + Op: "test", + Path: fmt.Sprintf("/spec/taints/%d", offset), + Value: taint, + }, + { + Op: "remove", + Path: fmt.Sprintf("/spec/taints/%d", offset), + }, + } + } else { + // add missing taint to exsting list + patches = []patchTaints{ + { + Op: "add", + Path: "/spec/taints/-", + Value: taint, + }, + } + } + + patchBytes, err := json.Marshal(patches) + if err != nil { + log.Fatalf("Error encoding taint patcht for node %s: %v", nodeID, err) + } + + _, err = client.CoreV1().Nodes().Patch(context.TODO(), nodeID, types.JSONPatchType, patchBytes, metav1.PatchOptions{}) + if err != nil { + log.Fatalf("Error patching taint for node %s: %v", nodeID, err) + } + + if taintShouldExists { + log.Info("Node taint added") + } else { + log.Info("Node taint removed") + } +}