Skip to content

Commit

Permalink
taint node with PreferNoSchedule to avoid double draining of pods
Browse files Browse the repository at this point in the history
  • Loading branch information
damoon committed Nov 28, 2020
1 parent 2bdc7eb commit 31ed67c
Showing 1 changed file with 57 additions and 0 deletions.
57 changes: 57 additions & 0 deletions cmd/kured/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ var (
prometheusURL string
alertFilter *regexp.Regexp
rebootSentinel string
preferNoScheduleTaint string
slackHookURL string
slackUsername string
slackChannel string
Expand Down Expand Up @@ -85,6 +86,8 @@ func main() {
"alert names to ignore when checking for active alerts")
rootCmd.PersistentFlags().StringVar(&rebootSentinel, "reboot-sentinel", "/var/run/reboot-required",
"path to file whose existence signals need to reboot")
rootCmd.PersistentFlags().StringVar(&preferNoScheduleTaint, "prefer-no-schedule-taint", "weave.works/kured-node-taint",
"taint to avoid double drained pods")

rootCmd.PersistentFlags().StringVar(&slackHookURL, "slack-hook-url", "",
"slack hook URL for reboot notfications")
Expand Down Expand Up @@ -279,6 +282,54 @@ func uncordon(client *kubernetes.Clientset, node *v1.Node) {
}
}

func preferNoSchedule(client *kubernetes.Clientset, nodeID string, rebootDesired bool) {
updatedNode, err := client.CoreV1().Nodes().Get(context.TODO(), nodeID, metav1.GetOptions{})
if err != nil || updatedNode == nil {
log.Fatalf("Error reading node %s: %v", nodeID, err)
}

taintExists := false
for _, taint := range updatedNode.Spec.Taints {
if taint.Key == preferNoScheduleTaint {
taintExists = true
break
}
}

if taintExists && rebootDesired {
log.Debugf("Taint %v exists already for node %v.", preferNoScheduleTaint, nodeID)
return
}

if !taintExists && !rebootDesired {
log.Debugf("Taint %v already missing for node %v.", preferNoScheduleTaint, nodeID)
return
}

if rebootDesired {
updatedNode.Spec.Taints = append(updatedNode.Spec.Taints, v1.Taint{
Key: preferNoScheduleTaint,
Value: "reboot",
Effect: v1.TaintEffectPreferNoSchedule,
})
} else {
taints := []v1.Taint{}
for _, taint := range updatedNode.Spec.Taints {
if taint.Key != preferNoScheduleTaint {
taints = append(taints, taint)
}
}
updatedNode.Spec.Taints = taints
}

_, err = client.CoreV1().Nodes().Update(context.TODO(), updatedNode, metav1.UpdateOptions{})
if err != nil {
log.Fatalf("Error updating taint for node %s: %v", nodeID, err)
}

log.Infof("Successfully updated taint for node %v", nodeID)
}

func commandReboot(nodeID string) {
log.Infof("Commanding reboot for node: %s", nodeID)

Expand Down Expand Up @@ -336,6 +387,10 @@ func rebootAsRequired(nodeID string, window *timewindow.TimeWindow, TTL time.Dur
release(lock)
}

if !rebootRequired() {
preferNoSchedule(client, nodeID, false)
}

source := rand.NewSource(time.Now().UnixNano())
tick := delaytick.New(source, period)
for range tick {
Expand All @@ -357,6 +412,8 @@ func rebootAsRequired(nodeID string, window *timewindow.TimeWindow, TTL time.Dur
}
nodeMeta.Unschedulable = node.Spec.Unschedulable

preferNoSchedule(client, nodeID, true)

if !acquire(lock, &nodeMeta, TTL) {
continue
}
Expand Down

0 comments on commit 31ed67c

Please sign in to comment.