Skip to content

Commit

Permalink
add metrics (#49)
Browse files Browse the repository at this point in the history
* add metrics

* fix build

* merge conflicts

* Improve metric names to match Prometheus conventions

* Use one histogram with labels for controller operation histogram

* fix histogram labels

* combine health and prom http server

* fix metrics help text

* remove dead flag

* set prom annotations and listen address flag

* rename listen address flag

* fix port
  • Loading branch information
mrIncompetent committed Feb 5, 2018
1 parent bb708b9 commit 10407b0
Show file tree
Hide file tree
Showing 8 changed files with 245 additions and 44 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:
key: repo-{{ .Environment.CIRCLE_SHA1 }}
- restore_cache:
key: vendor-{{ checksum "Gopkg.lock" }}
- run: go build -o machine-controller cmd/controller/main.go
- run: go build -o machine-controller github.com/kubermatic/machine-controller/cmd/controller

workflows:
version: 2
Expand Down
17 changes: 15 additions & 2 deletions Gopkg.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions Gopkg.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,11 @@ required = ["k8s.io/code-generator/cmd/client-gen"]
[[constraint]]
branch = "master"
name = "github.com/heptiolabs/healthcheck"

[[constraint]]
name = "github.com/go-kit/kit"
version = "v0.6.0"

[[constraint]]
name = "github.com/prometheus/client_golang"
version = "v0.8.0"
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ machine-controller: cmd pkg vendor
env CGO_ENABLED=0 go build \
-ldflags '-s -w' \
-o machine-controller \
cmd/controller/main.go
github.com/kubermatic/machine-controller/cmd/controller

docker-image:
docker build -t $(IMAGE_NAME) .
Expand Down
43 changes: 34 additions & 9 deletions cmd/controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,28 +34,29 @@ import (
"github.com/kubermatic/machine-controller/pkg/machines"
"github.com/kubermatic/machine-controller/pkg/signals"
"github.com/kubermatic/machine-controller/pkg/ssh"
"github.com/prometheus/client_golang/prometheus/promhttp"
apiextclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
kubeinformers "k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"
)

var (
masterURL string
kubeconfig string
sshKeyName string
clusterDNSIPs string
healthListenAddress string
workerCount int
masterURL string
kubeconfig string
sshKeyName string
clusterDNSIPs string
listenAddress string
workerCount int
)

func main() {
flag.StringVar(&kubeconfig, "kubeconfig", "", "Path to a kubeconfig. Only required if out-of-cluster.")
flag.StringVar(&masterURL, "master", "", "The address of the Kubernetes API server. Overrides any value in kubeconfig. Only required if out-of-cluster.")
flag.StringVar(&sshKeyName, "ssh-key-name", "machine-controller", "The name of the private key. This name will be used when a public key will be created at the cloud provider.")
flag.StringVar(&clusterDNSIPs, "cluster-dns", "10.10.10.10", "Comma-separated list of DNS server IP address.")
flag.StringVar(&healthListenAddress, "health-listen-address", "127.0.0.1:8086", "Listen address for the readiness/liveness http server. The endpoints are /live /ready")
flag.IntVar(&workerCount, "worker-count", 5, "Number of workers to process machines. Using a high number with a lot of machines might cause getting rate-limited from your cloud provider.")
flag.StringVar(&listenAddress, "internal-listen-address", "127.0.0.1:8085", "The address on which the http server will listen on. The server exposes metrics on /metrics, liveness check on /live and readiness check on /ready")

flag.Parse()

Expand Down Expand Up @@ -96,7 +97,17 @@ func main() {
glog.Fatalf("failed to get/create ssh key configmap: %v", err)
}

c := controller.NewMachineController(kubeClient, machineClient, kubeInformerFactory, machineInformerFactory, key, ips)
metrics := NewMachineControllerMetrics()
machineMetrics := controller.MetricsCollection{
Machines: metrics.Machines,
Workers: metrics.Workers,
Errors: metrics.Errors,
Nodes: metrics.Nodes,
ControllerOperation: metrics.ControllerOperation,
NodeJoinDuration: metrics.NodeJoinDuration,
}

c := controller.NewMachineController(kubeClient, machineClient, kubeInformerFactory, machineInformerFactory, key, ips, machineMetrics)

go kubeInformerFactory.Start(stopCh)
go machineInformerFactory.Start(stopCh)
Expand All @@ -115,7 +126,7 @@ func main() {
for name, c := range c.ReadinessChecks() {
health.AddReadinessCheck(name, c)
}
go http.ListenAndServe(healthListenAddress, health)
go serveUtilHttpServer(health)

if err = c.Run(workerCount, stopCh); err != nil {
glog.Fatalf("Error running controller: %v", err)
Expand All @@ -134,3 +145,17 @@ func parseClusterDNSIPs(s string) ([]net.IP, error) {
}
return ips, nil
}

func serveUtilHttpServer(health healthcheck.Handler) {
m := http.NewServeMux()
m.Handle("/metrics", promhttp.Handler())
m.Handle("/live", http.HandlerFunc(health.LiveEndpoint))
m.Handle("/ready", http.HandlerFunc(health.ReadyEndpoint))

s := http.Server{
Addr: listenAddress,
Handler: m,
}
glog.V(4).Infof("serving util http server on %s", listenAddress)
glog.Fatalf("util http server died: %v", s.ListenAndServe())
}
71 changes: 71 additions & 0 deletions cmd/controller/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
package main

import (
"github.com/go-kit/kit/metrics"
"github.com/go-kit/kit/metrics/prometheus"
prom "github.com/prometheus/client_golang/prometheus"
)

// MachineControllerMetrics is a struct of all metrics used in
// the machine controller.
type MachineControllerMetrics struct {
Machines metrics.Gauge
Nodes metrics.Gauge
Workers metrics.Gauge
Errors metrics.Counter
ControllerOperation metrics.Histogram
NodeJoinDuration metrics.Histogram
}

// NewMachineControllerMetrics creates new MachineControllerMetrics
// with default values initialized, so metrics always show up.
func NewMachineControllerMetrics() *MachineControllerMetrics {
namespace := "machine"
subsystem := "controller"

cm := &MachineControllerMetrics{
Machines: prometheus.NewGaugeFrom(prom.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "machines",
Help: "The number of machines",
}, []string{}),
Workers: prometheus.NewGaugeFrom(prom.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "workers",
Help: "The number of running machine controller workers",
}, []string{}),
Nodes: prometheus.NewGaugeFrom(prom.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "nodes",
Help: "The number of nodes created by a machine",
}, []string{}),
Errors: prometheus.NewCounterFrom(prom.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "errors_total",
Help: "The total number or unexpected errors the controller encountered",
}, []string{}),
ControllerOperation: prometheus.NewHistogramFrom(prom.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "controller_operation_duration_seconds",
Help: "The duration it takes to execute an operation",
}, []string{"operation"}),
NodeJoinDuration: prometheus.NewHistogramFrom(prom.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "node_join_duration_seconds",
Help: "The time it takes from creation of the machine resource and the final creation of the node resource",
}, []string{}),
}

// Set default values, so that these metrics always show up
cm.Machines.Set(0)
cm.Workers.Set(0)
cm.Nodes.Set(0)

return cm
}
6 changes: 5 additions & 1 deletion examples/machine-controller.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ spec:
app: machine-controller
template:
metadata:
annotations:
prometheus.io/scrape: true
prometheus.io/port: 8085
prometheus.io/path: /metrics
labels:
app: machine-controller
spec:
Expand All @@ -48,7 +52,7 @@ spec:
- -logtostderr
- -v=8
- -cluster-dns=10.10.10.10
- -health-listen-address=0.0.0.0:8086
- -internal-listen-address=0.0.0.0:8085
livenessProbe:
httpGet:
path: /live
Expand Down

0 comments on commit 10407b0

Please sign in to comment.