Skip to content

Commit

Permalink
Implement kured_reboot_required metric
Browse files Browse the repository at this point in the history
  • Loading branch information
awh committed Sep 21, 2017
1 parent 91bf84a commit 6e44cb1
Showing 1 changed file with 60 additions and 23 deletions.
83 changes: 60 additions & 23 deletions cmd/kured/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package main

import (
"math/rand"
"net/http"
"os"
"os/exec"
"regexp"
Expand All @@ -13,14 +14,18 @@ import (
"k8s.io/client-go/pkg/api/v1"
"k8s.io/client-go/rest"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/weaveworks/kured/pkg/alerts"
"github.com/weaveworks/kured/pkg/daemonsetlock"
"github.com/weaveworks/kured/pkg/delaytick"
"github.com/weaveworks/kured/pkg/notifications/slack"
)

var (
version = "unreleased"
version = "unreleased"

// Command line flags
period time.Duration
dsNamespace string
dsName string
Expand All @@ -30,8 +35,19 @@ var (
rebootSentinel string
slackHookURL string
slackUsername string

// Metrics
rebootRequiredGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Subsystem: "kured",
Name: "reboot_required",
Help: "OS requires reboot due to software updates.",
}, []string{"node"})
)

func init() {
prometheus.MustRegister(rebootRequiredGauge)
}

func main() {
rootCmd := &cobra.Command{
Use: "kured",
Expand Down Expand Up @@ -63,21 +79,29 @@ func main() {
}
}

func rebootRequired() bool {
func sentinelExists() bool {
_, err := os.Stat(rebootSentinel)
switch {
case err == nil:
log.Infof("Reboot required")
return true
case os.IsNotExist(err):
log.Infof("Reboot not required")
return false
default:
log.Fatalf("Unable to determine if reboot required: %v", err)
log.Fatalf("Unable to determine existence of sentinel: %v", err)
return false // unreachable; prevents compilation error
}
}

func rebootRequired() bool {
if sentinelExists() {
log.Infof("Reboot required")
return true
} else {
log.Infof("Reboot not required")
return false
}
}

func rebootBlocked() bool {
if prometheusURL != "" {
alertNames, err := alerts.PrometheusActiveAlerts(prometheusURL, alertFilter)
Expand Down Expand Up @@ -181,7 +205,7 @@ func waitForDrain(client *kubernetes.Clientset, nodeID string) {
}
}

func reboot(nodeID string) {
func commandReboot(nodeID string) {
log.Infof("Commanding reboot")

if slackHookURL != "" {
Expand All @@ -197,9 +221,13 @@ func reboot(nodeID string) {
}
}

func waitForReboot() {
func maintainRebootRequiredMetric(nodeID string) {
for {
log.Infof("Waiting for reboot")
if sentinelExists() {
rebootRequiredGauge.WithLabelValues(nodeID).Set(1)
} else {
rebootRequiredGauge.WithLabelValues(nodeID).Set(0)
}
time.Sleep(time.Minute)
}
}
Expand All @@ -209,18 +237,7 @@ type nodeMeta struct {
Unschedulable bool `json:"unschedulable"`
}

func root(cmd *cobra.Command, args []string) {
log.Infof("Kubernetes Reboot Daemon: %s", version)

nodeID := os.Getenv("KURED_NODE_ID")
if nodeID == "" {
log.Fatal("KURED_NODE_ID environment variable required")
}

log.Infof("Node ID: %s", nodeID)
log.Infof("Lock Annotation: %s/%s:%s", dsNamespace, dsName, lockAnnotation)
log.Infof("Reboot Sentinel: %s every %v", rebootSentinel, period)

func rebootAsRequired(nodeID string) {
config, err := rest.InClusterConfig()
if err != nil {
log.Fatal(err)
Expand Down Expand Up @@ -256,11 +273,31 @@ func root(cmd *cobra.Command, args []string) {
drain(nodeID)
waitForDrain(client, nodeID)
}
reboot(nodeID)
break
commandReboot(nodeID)
for {
log.Infof("Waiting for reboot")
time.Sleep(time.Minute)
}
}
}
}
}

func root(cmd *cobra.Command, args []string) {
log.Infof("Kubernetes Reboot Daemon: %s", version)

nodeID := os.Getenv("KURED_NODE_ID")
if nodeID == "" {
log.Fatal("KURED_NODE_ID environment variable required")
}

log.Infof("Node ID: %s", nodeID)
log.Infof("Lock Annotation: %s/%s:%s", dsNamespace, dsName, lockAnnotation)
log.Infof("Reboot Sentinel: %s every %v", rebootSentinel, period)

go rebootAsRequired(nodeID)
go maintainRebootRequiredMetric(nodeID)

waitForReboot()
http.Handle("/metrics", promhttp.Handler())
log.Fatal(http.ListenAndServe(":8080", nil))
}

0 comments on commit 6e44cb1

Please sign in to comment.