Skip to content

Commit

Permalink
add metric reporting to check-endpoints
Browse files Browse the repository at this point in the history
  • Loading branch information
sanchezl committed Jul 3, 2020
1 parent 0232520 commit 708513a
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 6 deletions.
27 changes: 26 additions & 1 deletion bindata/v4.1.0/kube-apiserver/pod.yaml
Expand Up @@ -171,18 +171,43 @@ spec:
args:
- --kubeconfig
- /etc/kubernetes/static-pod-resources/configmaps/kube-apiserver-cert-syncer-kubeconfig/kubeconfig
- --listen
- 0.0.0.0:17697
- --namespace
- openshift-kube-apiserver
- $(POD_NAMESPACE)
- --v
- '2'
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
volumeMounts:
- mountPath: /etc/kubernetes/static-pod-resources
name: resource-dir
ports:
- name: check-endpoints
hostPort: 17697
containerPort: 17697
protocol: TCP
livenessProbe:
httpGet:
scheme: HTTPS
port: 17697
path: healthz
initialDelaySeconds: 10
timeoutSeconds: 10
readinessProbe:
httpGet:
scheme: HTTPS
port: 17697
path: healthz
initialDelaySeconds: 10
timeoutSeconds: 10
resources:
requests:
memory: 50Mi
Expand Down
2 changes: 1 addition & 1 deletion cmd/cluster-kube-apiserver-operator/main.go
Expand Up @@ -71,7 +71,7 @@ func NewOperatorCommand(ctx context.Context) *cobra.Command {
cmd.AddCommand(regeneratecerts.NewRegenerateCertsCommand())
cmd.AddCommand(certregenerationcontroller.NewCertRegenerationControllerCommand(ctx))
cmd.AddCommand(insecurereadyz.NewInsecureReadyzCommand())
cmd.AddCommand(checkendpoints.New())
cmd.AddCommand(checkendpoints.NewCheckEndpointsCommand())

return cmd
}
6 changes: 3 additions & 3 deletions pkg/cmd/checkendpoints/cmd.go
Expand Up @@ -13,23 +13,23 @@ import (
"github.com/spf13/cobra"
)

func New() *cobra.Command {
func NewCheckEndpointsCommand() *cobra.Command {
config := controllercmd.NewControllerCommandConfig("check-endpoints", version.Get(), func(ctx context.Context, cctx *controllercmd.ControllerContext) error {
operatorcontrolplaneClient := operatorcontrolplaneclient.NewForConfigOrDie(cctx.KubeConfig)
operatorcontrolplaneInformers := operatorcontrolplaneinformers.NewSharedInformerFactoryWithOptions(operatorcontrolplaneClient, 10*time.Minute, operatorcontrolplaneinformers.WithNamespace("openshift-kube-apiserver"))
check := controller.NewPodNetworkConnectivityCheckController(
os.Getenv("POD_NAME"),
"openshift-kube-apiserver",
os.Getenv("POD_NAMESPACE"),
operatorcontrolplaneClient.ControlplaneV1alpha1(),
operatorcontrolplaneInformers.Controlplane().V1alpha1().PodNetworkConnectivityChecks(),
cctx.EventRecorder,
)
controller.RegisterMetrics()
operatorcontrolplaneInformers.Start(ctx.Done())
check.Run(ctx, 1)
<-ctx.Done()
return nil
})
config.DisableServing = true
config.DisableLeaderElection = true
cmd := config.NewCommandWithContext(context.Background())
cmd.Use = "check-endpoints"
Expand Down
1 change: 1 addition & 0 deletions pkg/cmd/checkendpoints/controller/connection_checker.go
Expand Up @@ -131,6 +131,7 @@ func getTCPConnectLatency(ctx context.Context, address string) (*trace.LatencyIn
if err == nil {
conn.Close()
}
updateMetrics(address, latencyInfo, err)
return latencyInfo, err
}

Expand Down
67 changes: 67 additions & 0 deletions pkg/cmd/checkendpoints/controller/metrics.go
@@ -0,0 +1,67 @@
package controller

import (
"sync"

"github.com/openshift/cluster-kube-apiserver-operator/pkg/cmd/checkendpoints/trace"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
)

var (
registerMetrics sync.Once

endpointCheckCounter = metrics.NewCounterVec(&metrics.CounterOpts{
Name: "openshift_kube_apiserver_endpoint_check_count",
Help: "Report status of endpoint checks for each API server pod over time.",
}, []string{"endpoint", "tcpConnect", "dnsResolve"})

tcpConnectLatencyGauge = metrics.NewGaugeVec(&metrics.GaugeOpts{
Name: "openshift_kube_apiserver_endpoint_check_tcp_connect_latency_gauge",
Help: "Report latency of TCP connect to endpoint for each API server pod over time.",
}, []string{"endpoint"})

dnsResolveLatencyGauge = metrics.NewGaugeVec(&metrics.GaugeOpts{
Name: "openshift_kube_apiserver_endpoint_check_dns_resolve_latency_gauge",
Help: "Report latency of DNS resolve of endpoint for each API server pod over time.",
}, []string{"endpoint"})
)

func RegisterMetrics() {
registerMetrics.Do(func() {
legacyregistry.MustRegister(endpointCheckCounter)
legacyregistry.MustRegister(tcpConnectLatencyGauge)
legacyregistry.MustRegister(dnsResolveLatencyGauge)
})
}

func updateMetrics(address string, latency *trace.LatencyInfo, checkErr error) {
endpointCheckCounter.With(getCounterMetricLabels(address, latency, checkErr)).Inc()
if latency.Connect > 0 {
tcpConnectLatencyGauge.WithLabelValues(address).Set(float64(latency.Connect.Nanoseconds()))
}
if latency.DNS > 0 {
dnsResolveLatencyGauge.WithLabelValues(address).Set(float64(latency.DNS.Nanoseconds()))
}
}

func getCounterMetricLabels(address string, latency *trace.LatencyInfo, checkErr error) map[string]string {
labels := map[string]string{
"endpoint": address,
"dnsResolve": "",
"tcpConnect": "",
}
if isDNSError(checkErr) {
labels["dnsResolve"] = "failure"
return labels
}
if latency.DNS != 0 {
labels["dnsResolve"] = "success"
}
if checkErr != nil {
labels["tcpConnect"] = "failure"
return labels
}
labels["tcpConnect"] = "success"
return labels
}
27 changes: 26 additions & 1 deletion pkg/operator/v410_00_assets/bindata.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 708513a

Please sign in to comment.