From 358b64cc7b1bac5742755abfafc2918204cadce4 Mon Sep 17 00:00:00 2001 From: Erik Swanson Date: Fri, 10 Aug 2018 13:42:59 -0700 Subject: [PATCH] Add initial support for stream upstream metrics This includes only the subset of per-upstream/peer metrics that already exist for non-stream upstreams which are applicable to stream/UDP upstreams. There are additional stream/UDP-specific metrics that should be added later, but this is a useful start. Note: This propagates the existing incorrect help text for the "fails" metric (it shouldn't be "Active connections") because I'm not sure what the correct help text should be. --- client/nginx_plus.go | 36 ++++++++++++++++++------- collector/nginx_plus.go | 59 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 82 insertions(+), 13 deletions(-) diff --git a/client/nginx_plus.go b/client/nginx_plus.go index 38f0db4eb..1b1ce4bee 100644 --- a/client/nginx_plus.go +++ b/client/nginx_plus.go @@ -62,11 +62,12 @@ type apiError struct { // Stats represents NGINX Plus stats fetched from the NGINX Plus API. // https://nginx.org/en/docs/http/ngx_http_api_module.html type Stats struct { - Connections Connections - HTTPRequests HTTPRequests - SSL SSL - ServerZones ServerZones - Upstreams Upstreams + Connections Connections + HTTPRequests HTTPRequests + SSL SSL + ServerZones ServerZones + StreamUpstreams Upstreams + Upstreams Upstreams } // Connections represents connection related stats. @@ -600,12 +601,18 @@ func (client *NginxPlusClient) GetStats() (*Stats, error) { return nil, fmt.Errorf("failed to get stats: %v", err) } + streamUpstreams, err := client.getStreamUpstreams() + if err != nil { + return nil, fmt.Errorf("failed to get stats: %v", err) + } + return &Stats{ - Connections: *cons, - HTTPRequests: *requests, - SSL: *ssl, - ServerZones: *zones, - Upstreams: *upstreams, + Connections: *cons, + HTTPRequests: *requests, + SSL: *ssl, + ServerZones: *zones, + StreamUpstreams: *streamUpstreams, + Upstreams: *upstreams, }, nil } @@ -647,6 +654,15 @@ func (client *NginxPlusClient) getServerZones() (*ServerZones, error) { return &zones, err } +func (client *NginxPlusClient) getStreamUpstreams() (*Upstreams, error) { + var upstreams Upstreams + err := client.get("stream/upstreams", &upstreams) + if err != nil { + return nil, fmt.Errorf("failed to get stream upstreams: %v", err) + } + return &upstreams, nil +} + func (client *NginxPlusClient) getUpstreams() (*Upstreams, error) { var upstreams Upstreams err := client.get("http/upstreams", &upstreams) diff --git a/collector/nginx_plus.go b/collector/nginx_plus.go index 45229fde8..759d8b783 100644 --- a/collector/nginx_plus.go +++ b/collector/nginx_plus.go @@ -10,9 +10,9 @@ import ( // NginxPlusCollector collects NGINX Plus metrics. It implements prometheus.Collector interface. type NginxPlusCollector struct { - nginxClient *client.NginxPlusClient - totalMetrics, serverZoneMetrics, upstreamMetrics, upstreamServerMetrics map[string]*prometheus.Desc - mutex sync.Mutex + nginxClient *client.NginxPlusClient + totalMetrics, serverZoneMetrics, streamUpstreamMetrics, streamUpstreamServerMetrics, upstreamMetrics, upstreamServerMetrics map[string]*prometheus.Desc + mutex sync.Mutex } // NewNginxPlusCollector creates an NginxPlusCollector. @@ -42,6 +42,21 @@ func NewNginxPlusCollector(nginxClient *client.NginxPlusClient, namespace string "received": newServerZoneMetric(namespace, "received", "Bytes received from clients", nil), "sent": newServerZoneMetric(namespace, "sent", "Bytes sent to clients", nil), }, + streamUpstreamMetrics: map[string]*prometheus.Desc{ + "zombies": newStreamUpstreamMetric(namespace, "zombies", "Servers removed from the group but still processing active client requests"), + }, + streamUpstreamServerMetrics: map[string]*prometheus.Desc{ + "state": newStreamUpstreamServerMetric(namespace, "state", "Current state", nil), + "active": newStreamUpstreamServerMetric(namespace, "active", "Active connections", nil), + "sent": newStreamUpstreamServerMetric(namespace, "sent", "Bytes sent to this server", nil), + "received": newStreamUpstreamServerMetric(namespace, "received", "Bytes received to this server", nil), + "fails": newStreamUpstreamServerMetric(namespace, "fails", "Active connections", nil), + "unavail": newStreamUpstreamServerMetric(namespace, "unavail", "How many times the server became unavailable for client requests (state 'unavail') due to the number of unsuccessful attempts reaching the max_fails threshold", nil), + "response_time": newStreamUpstreamServerMetric(namespace, "response_time", "Average time to get the full response from the server", nil), + "health_checks_checks": newStreamUpstreamServerMetric(namespace, "health_checks_checks", "Total health check requests", nil), + "health_checks_fails": newStreamUpstreamServerMetric(namespace, "health_checks_fails", "Failed health checks", nil), + "health_checks_unhealthy": newStreamUpstreamServerMetric(namespace, "health_checks_unhealthy", "How many times the server became unhealthy (state 'unhealthy')", nil), + }, upstreamMetrics: map[string]*prometheus.Desc{ "keepalives": newUpstreamMetric(namespace, "keepalives", "Idle keepalive connections"), "zombies": newUpstreamMetric(namespace, "zombies", "Servers removed from the group but still processing active client requests"), @@ -138,6 +153,36 @@ func (c *NginxPlusCollector) Collect(ch chan<- prometheus.Metric) { prometheus.CounterValue, float64(zone.Sent), name) } + for name, upstream := range stats.StreamUpstreams { + for _, peer := range upstream.Peers { + ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["state"], + prometheus.GaugeValue, upstreamServerStates[peer.State], name, peer.Server) + ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["active"], + prometheus.GaugeValue, float64(peer.Active), name, peer.Server) + ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["sent"], + prometheus.CounterValue, float64(peer.Sent), name, peer.Server) + ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["received"], + prometheus.CounterValue, float64(peer.Received), name, peer.Server) + ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["fails"], + prometheus.CounterValue, float64(peer.Fails), name, peer.Server) + ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["unavail"], + prometheus.CounterValue, float64(peer.Unavail), name, peer.Server) + ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["response_time"], + prometheus.GaugeValue, float64(peer.ResponseTime), name, peer.Server) + + if peer.HealthChecks != (client.HealthChecks{}) { + ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["health_checks_checks"], + prometheus.CounterValue, float64(peer.HealthChecks.Checks), name, peer.Server) + ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["health_checks_fails"], + prometheus.CounterValue, float64(peer.HealthChecks.Fails), name, peer.Server) + ch <- prometheus.MustNewConstMetric(c.streamUpstreamServerMetrics["health_checks_unhealthy"], + prometheus.CounterValue, float64(peer.HealthChecks.Unhealthy), name, peer.Server) + } + } + ch <- prometheus.MustNewConstMetric(c.streamUpstreamMetrics["zombies"], + prometheus.GaugeValue, float64(upstream.Zombies), name) + } + for name, upstream := range stats.Upstreams { for _, peer := range upstream.Peers { ch <- prometheus.MustNewConstMetric(c.upstreamServerMetrics["state"], @@ -198,6 +243,14 @@ func newServerZoneMetric(namespace string, metricName string, docString string, return prometheus.NewDesc(prometheus.BuildFQName(namespace, "server_zone", metricName), docString, []string{"server_zone"}, constLabels) } +func newStreamUpstreamMetric(namespace string, metricName string, docString string) *prometheus.Desc { + return prometheus.NewDesc(prometheus.BuildFQName(namespace, "stream_upstream", metricName), docString, []string{"upstream"}, nil) +} + +func newStreamUpstreamServerMetric(namespace string, metricName string, docString string, constLabels prometheus.Labels) *prometheus.Desc { + return prometheus.NewDesc(prometheus.BuildFQName(namespace, "stream_upstream_server", metricName), docString, []string{"upstream", "server"}, constLabels) +} + func newUpstreamMetric(namespace string, metricName string, docString string) *prometheus.Desc { return prometheus.NewDesc(prometheus.BuildFQName(namespace, "upstream", metricName), docString, []string{"upstream"}, nil) }