Skip to content

Commit

Permalink
Introduce Metrics Options struct & secure metrics serving
Browse files Browse the repository at this point in the history
  • Loading branch information
sbueringer committed Aug 10, 2023
1 parent 5bf44d2 commit e59161e
Show file tree
Hide file tree
Showing 17 changed files with 1,477 additions and 212 deletions.
59 changes: 59 additions & 0 deletions examples/scratch-env/go.sum

Large diffs are not rendered by default.

38 changes: 38 additions & 0 deletions go.mod
Expand Up @@ -22,6 +22,7 @@ require (
k8s.io/api v0.28.0-beta.0
k8s.io/apiextensions-apiserver v0.28.0-beta.0
k8s.io/apimachinery v0.28.0-beta.0
k8s.io/apiserver v0.28.0-beta.0
k8s.io/client-go v0.28.0-beta.0
k8s.io/component-base v0.28.0-beta.0
k8s.io/klog/v2 v2.100.1
Expand All @@ -30,21 +31,34 @@ require (
)

require (
github.com/NYTimes/gziphandler v1.1.1 // indirect
github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230305170008-8188dc5388df // indirect
github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/blang/semver/v4 v4.0.0 // indirect
github.com/cenkalti/backoff/v4 v4.2.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/coreos/go-semver v0.3.1 // indirect
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/emicklei/go-restful/v3 v3.9.0 // indirect
github.com/felixge/httpsnoop v1.0.3 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-openapi/jsonpointer v0.19.6 // indirect
github.com/go-openapi/jsonreference v0.20.2 // indirect
github.com/go-openapi/swag v0.22.3 // indirect
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/google/cel-go v0.16.0 // indirect
github.com/google/gnostic-models v0.6.8 // indirect
github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 // indirect
github.com/imdario/mergo v0.3.6 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
Expand All @@ -55,20 +69,44 @@ require (
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/common v0.44.0 // indirect
github.com/prometheus/procfs v0.10.1 // indirect
github.com/spf13/cobra v1.7.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stoewer/go-strcase v1.2.0 // indirect
go.etcd.io/etcd/api/v3 v3.5.9 // indirect
go.etcd.io/etcd/client/pkg/v3 v3.5.9 // indirect
go.etcd.io/etcd/client/v3 v3.5.9 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.35.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.35.1 // indirect
go.opentelemetry.io/otel v1.10.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.10.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.10.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.10.0 // indirect
go.opentelemetry.io/otel/metric v0.31.0 // indirect
go.opentelemetry.io/otel/sdk v1.10.0 // indirect
go.opentelemetry.io/otel/trace v1.10.0 // indirect
go.opentelemetry.io/proto/otlp v0.19.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/crypto v0.11.0 // indirect
golang.org/x/net v0.12.0 // indirect
golang.org/x/oauth2 v0.8.0 // indirect
golang.org/x/sync v0.2.0 // indirect
golang.org/x/term v0.10.0 // indirect
golang.org/x/text v0.11.0 // indirect
golang.org/x/time v0.3.0 // indirect
golang.org/x/tools v0.9.3 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/genproto v0.0.0-20230526161137-0005af68ea54 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20230525234035-dd9d682886f9 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20230525234030-28d5490b6b19 // indirect
google.golang.org/grpc v1.54.0 // indirect
google.golang.org/protobuf v1.30.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/kms v0.28.0-beta.0 // indirect
k8s.io/kube-openapi v0.0.0-20230717233707-2695361300d9 // indirect
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.1.2 // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect
)
446 changes: 446 additions & 0 deletions go.sum

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions pkg/builder/builder_suite_test.go
Expand Up @@ -26,12 +26,12 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/client-go/rest"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"

"sigs.k8s.io/controller-runtime/pkg/envtest"
"sigs.k8s.io/controller-runtime/pkg/internal/testing/addr"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
"sigs.k8s.io/controller-runtime/pkg/metrics"
"sigs.k8s.io/controller-runtime/pkg/webhook"
)

Expand All @@ -57,7 +57,7 @@ var _ = BeforeSuite(func() {
Expect(err).NotTo(HaveOccurred())

// Prevent the metrics listener being created
metrics.DefaultBindAddress = "0"
metricsserver.DefaultBindAddress = "0"

webhook.DefaultPort, _, err = addr.Suggest("")
Expect(err).NotTo(HaveOccurred())
Expand All @@ -67,7 +67,7 @@ var _ = AfterSuite(func() {
Expect(testenv.Stop()).To(Succeed())

// Put the DefaultBindAddress back
metrics.DefaultBindAddress = ":8080"
metricsserver.DefaultBindAddress = ":8080"

// Change the webhook.DefaultPort back to the original default.
webhook.DefaultPort = 9443
Expand Down
6 changes: 3 additions & 3 deletions pkg/controller/controller_suite_test.go
Expand Up @@ -26,12 +26,12 @@ import (
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/rest"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"

"sigs.k8s.io/controller-runtime/pkg/controller/controllertest"
"sigs.k8s.io/controller-runtime/pkg/envtest"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
"sigs.k8s.io/controller-runtime/pkg/metrics"
crscheme "sigs.k8s.io/controller-runtime/pkg/scheme"
)

Expand Down Expand Up @@ -79,12 +79,12 @@ var _ = BeforeSuite(func() {
Expect(err).NotTo(HaveOccurred())

// Prevent the metrics listener being created
metrics.DefaultBindAddress = "0"
metricsserver.DefaultBindAddress = "0"
})

var _ = AfterSuite(func() {
Expect(testenv.Stop()).To(Succeed())

// Put the DefaultBindAddress back
metrics.DefaultBindAddress = ":8080"
metricsserver.DefaultBindAddress = ":8080"
})
60 changes: 7 additions & 53 deletions pkg/manager/internal.go
Expand Up @@ -28,7 +28,6 @@ import (
"time"

"github.com/go-logr/logr"
"github.com/prometheus/client_golang/prometheus/promhttp"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/runtime"
kerrors "k8s.io/apimachinery/pkg/util/errors"
Expand All @@ -44,7 +43,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/internal/httpserver"
intrec "sigs.k8s.io/controller-runtime/pkg/internal/recorder"
"sigs.k8s.io/controller-runtime/pkg/metrics"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
"sigs.k8s.io/controller-runtime/pkg/webhook"
)

Expand All @@ -57,7 +56,6 @@ const (

defaultReadinessEndpoint = "/readyz"
defaultLivenessEndpoint = "/healthz"
defaultMetricsEndpoint = "/metrics"
)

var _ Runnable = &controllerManager{}
Expand All @@ -84,11 +82,8 @@ type controllerManager struct {
// on shutdown
leaderElectionReleaseOnCancel bool

// metricsListener is used to serve prometheus metrics
metricsListener net.Listener

// metricsExtraHandlers contains extra handlers to register on http server that serves metrics.
metricsExtraHandlers map[string]http.Handler
// metricsServer is used to serve prometheus metrics
metricsServer metricsserver.Server

// healthProbeListener is used to serve liveness probe
healthProbeListener net.Listener
Expand Down Expand Up @@ -184,28 +179,6 @@ func (cm *controllerManager) add(r Runnable) error {
return cm.runnables.Add(r)
}

// AddMetricsExtraHandler adds extra handler served on path to the http server that serves metrics.
func (cm *controllerManager) AddMetricsExtraHandler(path string, handler http.Handler) error {
cm.Lock()
defer cm.Unlock()

if cm.started {
return fmt.Errorf("unable to add new metrics handler because metrics endpoint has already been created")
}

if path == defaultMetricsEndpoint {
return fmt.Errorf("overriding builtin %s endpoint is not allowed", defaultMetricsEndpoint)
}

if _, found := cm.metricsExtraHandlers[path]; found {
return fmt.Errorf("can't register extra handler by duplicate path %q on metrics http server", path)
}

cm.metricsExtraHandlers[path] = handler
cm.logger.V(2).Info("Registering metrics http server extra handler", "path", path)
return nil
}

// AddHealthzCheck allows you to add Healthz checker.
func (cm *controllerManager) AddHealthzCheck(name string, check healthz.Checker) error {
cm.Lock()
Expand Down Expand Up @@ -296,27 +269,6 @@ func (cm *controllerManager) GetControllerOptions() config.Controller {
return cm.controllerConfig
}

func (cm *controllerManager) addMetricsServer() error {
mux := http.NewServeMux()
srv := httpserver.New(mux)

handler := promhttp.HandlerFor(metrics.Registry, promhttp.HandlerOpts{
ErrorHandling: promhttp.HTTPErrorOnError,
})
// TODO(JoelSpeed): Use existing Kubernetes machinery for serving metrics
mux.Handle(defaultMetricsEndpoint, handler)
for path, extraHandler := range cm.metricsExtraHandlers {
mux.Handle(path, extraHandler)
}

return cm.add(&server{
Kind: "metrics",
Log: cm.logger.WithValues("path", defaultMetricsEndpoint),
Server: srv,
Listener: cm.metricsListener,
})
}

func (cm *controllerManager) addHealthProbeServer() error {
mux := http.NewServeMux()
srv := httpserver.New(mux)
Expand Down Expand Up @@ -410,8 +362,10 @@ func (cm *controllerManager) Start(ctx context.Context) (err error) {
// Metrics should be served whether the controller is leader or not.
// (If we don't serve metrics for non-leaders, prometheus will still scrape
// the pod but will get a connection refused).
if cm.metricsListener != nil {
if err := cm.addMetricsServer(); err != nil {
if cm.metricsServer != nil {
// Note: We are adding the metrics server directly to HTTPServers here as matching on the
// metricsserver.Server interface in cm.runnables.Add would be very brittle.
if err := cm.runnables.HTTPServers.Add(cm.metricsServer, nil); err != nil {
return fmt.Errorf("failed to add metrics server: %w", err)
}
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/manager/internal/integration/manager_test.go
Expand Up @@ -34,6 +34,7 @@ import (
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"

ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -145,7 +146,7 @@ var _ = Describe("manger.Manager Start", func() {
Scheme: scheme,
HealthProbeBindAddress: ":0",
// Disable metrics to avoid port conflicts.
MetricsBindAddress: "0",
Metrics: metricsserver.Options{BindAddress: "0"},
WebhookServer: webhook.NewServer(webhook.Options{
Port: env.WebhookInstallOptions.LocalServingPort,
Host: env.WebhookInstallOptions.LocalServingHost,
Expand Down
40 changes: 13 additions & 27 deletions pkg/manager/manager.go
Expand Up @@ -33,6 +33,7 @@ import (
"k8s.io/client-go/tools/leaderelection/resourcelock"
"k8s.io/client-go/tools/record"
"k8s.io/utils/pointer"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"

"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand All @@ -43,7 +44,6 @@ import (
intrec "sigs.k8s.io/controller-runtime/pkg/internal/recorder"
"sigs.k8s.io/controller-runtime/pkg/leaderelection"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/metrics"
"sigs.k8s.io/controller-runtime/pkg/recorder"
"sigs.k8s.io/controller-runtime/pkg/webhook"
)
Expand All @@ -65,13 +65,6 @@ type Manager interface {
// election was configured.
Elected() <-chan struct{}

// AddMetricsExtraHandler adds an extra handler served on path to the http server that serves metrics.
// Might be useful to register some diagnostic endpoints e.g. pprof. Note that these endpoints meant to be
// sensitive and shouldn't be exposed publicly.
// If the simple path -> handler mapping offered here is not enough, a new http server/listener should be added as
// Runnable to the manager via Add method.
AddMetricsExtraHandler(path string, handler http.Handler) error

// AddHealthzCheck allows you to add Healthz checker
AddHealthzCheck(name string, check healthz.Checker) error

Expand Down Expand Up @@ -219,10 +212,8 @@ type Options struct {
// between tries of actions. Default is 2 seconds.
RetryPeriod *time.Duration

// MetricsBindAddress is the TCP address that the controller should bind to
// for serving prometheus metrics.
// It can be set to "0" to disable the metrics serving.
MetricsBindAddress string
// Metrics are the metricsserver.Options that will be used to create the metricsserver.Server.
Metrics metricsserver.Options

// HealthProbeBindAddress is the TCP address that the controller should bind to
// for serving health probes
Expand All @@ -243,8 +234,8 @@ type Options struct {
PprofBindAddress string

// WebhookServer is an externally configured webhook.Server. By default,
// a Manager will create a default server using Port, Host, and CertDir;
// if this is set, the Manager will use this server instead.
// a Manager will create a server via webhook.NewServer with default settings.
// If this is set, the Manager will use this server instead.
WebhookServer webhook.Server

// BaseContext is the function that provides Context values to Runnables
Expand Down Expand Up @@ -279,7 +270,7 @@ type Options struct {
// Dependency injection for testing
newRecorderProvider func(config *rest.Config, httpClient *http.Client, scheme *runtime.Scheme, logger logr.Logger, makeBroadcaster intrec.EventBroadcasterProducer) (*intrec.Provider, error)
newResourceLock func(config *rest.Config, recorderProvider recorder.Provider, options leaderelection.Options) (resourcelock.Interface, error)
newMetricsListener func(addr string) (net.Listener, error)
newMetricsServer func(options metricsserver.Options, config *rest.Config, httpClient *http.Client) (metricsserver.Server, error)
newHealthProbeListener func(addr string) (net.Listener, error)
newPprofListener func(addr string) (net.Listener, error)
}
Expand Down Expand Up @@ -383,16 +374,12 @@ func New(config *rest.Config, options Options) (Manager, error) {
}
}

// Create the metrics listener. This will throw an error if the metrics bind
// address is invalid or already in use.
metricsListener, err := options.newMetricsListener(options.MetricsBindAddress)
// Create the metrics server.
metricsServer, err := options.newMetricsServer(options.Metrics, config, cluster.GetHTTPClient())
if err != nil {
return nil, err
}

// By default we have no extra endpoints to expose on metrics http server.
metricsExtraHandlers := make(map[string]http.Handler)

// Create health probes listener. This will throw an error if the bind
// address is invalid or already in use.
healthProbeListener, err := options.newHealthProbeListener(options.HealthProbeBindAddress)
Expand All @@ -417,8 +404,7 @@ func New(config *rest.Config, options Options) (Manager, error) {
errChan: errChan,
recorderProvider: recorderProvider,
resourceLock: resourceLock,
metricsListener: metricsListener,
metricsExtraHandlers: metricsExtraHandlers,
metricsServer: metricsServer,
controllerConfig: options.Controller,
logger: options.Logger,
elected: make(chan struct{}),
Expand Down Expand Up @@ -464,8 +450,8 @@ func (o Options) AndFrom(loader config.ControllerManagerConfiguration) (Options,
o.Cache.DefaultNamespaces = map[string]cache.Config{newObj.CacheNamespace: {}}
}

if o.MetricsBindAddress == "" && newObj.Metrics.BindAddress != "" {
o.MetricsBindAddress = newObj.Metrics.BindAddress
if o.Metrics.BindAddress == "" && newObj.Metrics.BindAddress != "" {
o.Metrics.BindAddress = newObj.Metrics.BindAddress
}

if o.HealthProbeBindAddress == "" && newObj.Health.HealthProbeBindAddress != "" {
Expand Down Expand Up @@ -616,8 +602,8 @@ func setOptionsDefaults(options Options) Options {
}
}

if options.newMetricsListener == nil {
options.newMetricsListener = metrics.NewListener
if options.newMetricsServer == nil {
options.newMetricsServer = metricsserver.NewServer
}
leaseDuration, renewDeadline, retryPeriod := defaultLeaseDuration, defaultRenewDeadline, defaultRetryPeriod
if options.LeaseDuration == nil {
Expand Down

0 comments on commit e59161e

Please sign in to comment.