Skip to content

Commit

Permalink
Fix open connections metric
Browse files Browse the repository at this point in the history
Co-authored-by: Romain <rtribotte@users.noreply.github.com>
  • Loading branch information
mpl and rtribotte committed Mar 20, 2023
1 parent 598a257 commit 7c2af10
Show file tree
Hide file tree
Showing 17 changed files with 126 additions and 236 deletions.
12 changes: 6 additions & 6 deletions cmd/traefik/traefik.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,14 @@ func setupServer(staticConfiguration *static.Configuration) (*server.Server, err

tsProviders := initTailscaleProviders(staticConfiguration, &providerAggregator)

// Metrics

metricRegistries := registerMetricClients(staticConfiguration.Metrics)
metricsRegistry := metrics.NewMultiRegistry(metricRegistries)

// Entrypoints

serverEntryPointsTCP, err := server.NewTCPEntryPoints(staticConfiguration.EntryPoints, staticConfiguration.HostResolver)
serverEntryPointsTCP, err := server.NewTCPEntryPoints(staticConfiguration.EntryPoints, staticConfiguration.HostResolver, metricsRegistry)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -243,11 +248,6 @@ func setupServer(staticConfiguration *static.Configuration) (*server.Server, err
}
}

// Metrics

metricRegistries := registerMetricClients(staticConfiguration.Metrics)
metricsRegistry := metrics.NewMultiRegistry(metricRegistries)

// Service manager factory

var spiffeX509Source *workloadapi.X509Source
Expand Down
50 changes: 26 additions & 24 deletions docs/content/observability/metrics/overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,59 +16,74 @@ Traefik Proxy hosts an official Grafana dashboard for both [on-premises](https:/

## Global Metrics

| Metric | Type | Description |
|---------------------------------------------|---------|---------------------------------------------------------|
| Config reload total | Count | The total count of configuration reloads. |
| Config reload last success | Gauge | The timestamp of the last configuration reload success. |
| TLS certificates not after | Gauge | The expiration date of certificates. |
| Metric | Type | [Labels](#labels) | Description |
|----------------------------|-------|--------------------------|--------------------------------------------------------------------|
| Config reload total | Count | | The total count of configuration reloads. |
| Config reload last success | Gauge | | The timestamp of the last configuration reload success. |
| Open connections | Gauge | `entrypoint`, `protocol` | The current count of open connections, by entrypoint and protocol. |
| TLS certificates not after | Gauge | | The expiration date of certificates. |

```prom tab="Prometheus"
traefik_config_reloads_total
traefik_config_last_reload_success
traefik_open_connections
traefik_tls_certs_not_after
```

```dd tab="Datadog"
config.reload.total
config.reload.lastSuccessTimestamp
open.connections
tls.certs.notAfterTimestamp
```

```influxdb tab="InfluxDB2"
traefik.config.reload.total
traefik.config.reload.lastSuccessTimestamp
traefik.open.connections
traefik.tls.certs.notAfterTimestamp
```

```statsd tab="StatsD"
# Default prefix: "traefik"
{prefix}.config.reload.total
{prefix}.config.reload.lastSuccessTimestamp
{prefix}.open.connections
{prefix}.tls.certs.notAfterTimestamp
```

```opentelemetry tab="OpenTelemetry"
traefik_config_reloads_total
traefik_config_last_reload_success
traefik_open_connections
traefik_tls_certs_not_after
```

## EntryPoint Metrics
### Labels

Here is a comprehensive list of labels that are provided by the global metrics:

| Label | Description | example |
|---------------|----------------------------------------|----------------------|
| `entrypoint` | Entrypoint that handled the connection | "example_entrypoint" |
| `protocol` | Connection protocol | "TCP" |

## HTTP Metrics

### EntryPoint Metrics

| Metric | Type | [Labels](#labels) | Description |
|-----------------------|-----------|--------------------------------------------|---------------------------------------------------------------------|
| Requests total | Count | `code`, `method`, `protocol`, `entrypoint` | The total count of HTTP requests received by an entrypoint. |
| Requests TLS total | Count | `tls_version`, `tls_cipher`, `entrypoint` | The total count of HTTPS requests received by an entrypoint. |
| Request duration | Histogram | `code`, `method`, `protocol`, `entrypoint` | Request processing duration histogram on an entrypoint. |
| Open connections | Count | `method`, `protocol`, `entrypoint` | The current count of open connections on an entrypoint. |
| Requests bytes total | Count | `code`, `method`, `protocol`, `entrypoint` | The total size of HTTP requests in bytes handled by an entrypoint. |
| Responses bytes total | Count | `code`, `method`, `protocol`, `entrypoint` | The total size of HTTP responses in bytes handled by an entrypoint. |

```prom tab="Prometheus"
traefik_entrypoint_requests_total
traefik_entrypoint_requests_tls_total
traefik_entrypoint_request_duration_seconds
traefik_entrypoint_open_connections
traefik_entrypoint_requests_bytes_total
traefik_entrypoint_responses_bytes_total
```
Expand All @@ -77,7 +92,6 @@ traefik_entrypoint_responses_bytes_total
entrypoint.request.total
entrypoint.request.tls.total
entrypoint.request.duration
entrypoint.connections.open
entrypoint.requests.bytes.total
entrypoint.responses.bytes.total
```
Expand All @@ -86,7 +100,6 @@ entrypoint.responses.bytes.total
traefik.entrypoint.requests.total
traefik.entrypoint.requests.tls.total
traefik.entrypoint.request.duration
traefik.entrypoint.connections.open
traefik.entrypoint.requests.bytes.total
traefik.entrypoint.responses.bytes.total
```
Expand All @@ -96,7 +109,6 @@ traefik.entrypoint.responses.bytes.total
{prefix}.entrypoint.request.total
{prefix}.entrypoint.request.tls.total
{prefix}.entrypoint.request.duration
{prefix}.entrypoint.connections.open
{prefix}.entrypoint.requests.bytes.total
{prefix}.entrypoint.responses.bytes.total
```
Expand All @@ -110,22 +122,20 @@ traefik_entrypoint_requests_bytes_total
traefik_entrypoint_responses_bytes_total
```

## Router Metrics
### Router Metrics

| Metric | Type | [Labels](#labels) | Description |
|-----------------------|-----------|---------------------------------------------------|----------------------------------------------------------------|
| Requests total | Count | `code`, `method`, `protocol`, `router`, `service` | The total count of HTTP requests handled by a router. |
| Requests TLS total | Count | `tls_version`, `tls_cipher`, `router`, `service` | The total count of HTTPS requests handled by a router. |
| Request duration | Histogram | `code`, `method`, `protocol`, `router`, `service` | Request processing duration histogram on a router. |
| Open connections | Count | `method`, `protocol`, `router`, `service` | The current count of open connections on a router. |
| Requests bytes total | Count | `code`, `method`, `protocol`, `router`, `service` | The total size of HTTP requests in bytes handled by a router. |
| Responses bytes total | Count | `code`, `method`, `protocol`, `router`, `service` | The total size of HTTP responses in bytes handled by a router. |

```prom tab="Prometheus"
traefik_router_requests_total
traefik_router_requests_tls_total
traefik_router_request_duration_seconds
traefik_router_open_connections
traefik_router_requests_bytes_total
traefik_router_responses_bytes_total
```
Expand All @@ -134,7 +144,6 @@ traefik_router_responses_bytes_total
router.request.total
router.request.tls.total
router.request.duration
router.connections.open
router.requests.bytes.total
router.responses.bytes.total
```
Expand All @@ -143,7 +152,6 @@ router.responses.bytes.total
traefik.router.requests.total
traefik.router.requests.tls.total
traefik.router.request.duration
traefik.router.connections.open
traefik.router.requests.bytes.total
traefik.router.responses.bytes.total
```
Expand All @@ -153,7 +161,6 @@ traefik.router.responses.bytes.total
{prefix}.router.request.total
{prefix}.router.request.tls.total
{prefix}.router.request.duration
{prefix}.router.connections.open
{prefix}.router.requests.bytes.total
{prefix}.router.responses.bytes.total
```
Expand All @@ -167,14 +174,13 @@ traefik_router_requests_bytes_total
traefik_router_responses_bytes_total
```

## Service Metrics
### Service Metrics

| Metric | Type | Labels | Description |
|-----------------------|-----------|-----------------------------------------|-------------------------------------------------------------|
| Requests total | Count | `code`, `method`, `protocol`, `service` | The total count of HTTP requests processed on a service. |
| Requests TLS total | Count | `tls_version`, `tls_cipher`, `service` | The total count of HTTPS requests processed on a service. |
| Request duration | Histogram | `code`, `method`, `protocol`, `service` | Request processing duration histogram on a service. |
| Open connections | Count | `method`, `protocol`, `service` | The current count of open connections on a service. |
| Retries total | Count | `service` | The count of requests retries on a service. |
| Server UP | Gauge | `service`, `url` | Current service's server status, 0 for a down or 1 for up. |
| Requests bytes total | Count | `code`, `method`, `protocol`, `service` | The total size of requests in bytes received by a service. |
Expand All @@ -184,7 +190,6 @@ traefik_router_responses_bytes_total
traefik_service_requests_total
traefik_service_requests_tls_total
traefik_service_request_duration_seconds
traefik_service_open_connections
traefik_service_retries_total
traefik_service_server_up
traefik_service_requests_bytes_total
Expand All @@ -195,7 +200,6 @@ traefik_service_responses_bytes_total
service.request.total
router.service.tls.total
service.request.duration
service.connections.open
service.retries.total
service.server.up
service.requests.bytes.total
Expand All @@ -206,7 +210,6 @@ service.responses.bytes.total
traefik.service.requests.total
traefik.service.requests.tls.total
traefik.service.request.duration
traefik.service.connections.open
traefik.service.retries.total
traefik.service.server.up
traefik.service.requests.bytes.total
Expand All @@ -218,7 +221,6 @@ traefik.service.responses.bytes.total
{prefix}.service.request.total
{prefix}.service.request.tls.total
{prefix}.service.request.duration
{prefix}.service.connections.open
{prefix}.service.retries.total
{prefix}.service.server.up
{prefix}.service.requests.bytes.total
Expand All @@ -236,7 +238,7 @@ traefik_service_requests_bytes_total
traefik_service_responses_bytes_total
```

## Labels
### Labels

Here is a comprehensive list of labels that are provided by the metrics:

Expand Down
13 changes: 5 additions & 8 deletions pkg/metrics/datadog.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,29 +18,28 @@ var (

// Metric names consistent with https://github.com/DataDog/integrations-extras/pull/64
const (
ddConfigReloadsName = "config.reload.total"
ddLastConfigReloadSuccessName = "config.reload.lastSuccessTimestamp"
ddConfigReloadsName = "config.reload.total"
ddLastConfigReloadSuccessName = "config.reload.lastSuccessTimestamp"
ddOpenConnsName = "open.connections"

ddTLSCertsNotAfterTimestampName = "tls.certs.notAfterTimestamp"

ddEntryPointReqsName = "entrypoint.request.total"
ddEntryPointReqsTLSName = "entrypoint.request.tls.total"
ddEntryPointReqDurationName = "entrypoint.request.duration"
ddEntryPointOpenConnsName = "entrypoint.connections.open"
ddEntryPointReqsBytesName = "entrypoint.requests.bytes.total"
ddEntryPointRespsBytesName = "entrypoint.responses.bytes.total"

ddRouterReqsName = "router.request.total"
ddRouterReqsTLSName = "router.request.tls.total"
ddRouterReqsDurationName = "router.request.duration"
ddRouterOpenConnsName = "router.connections.open"
ddRouterReqsBytesName = "router.requests.bytes.total"
ddRouterRespsBytesName = "router.responses.bytes.total"

ddServiceReqsName = "service.request.total"
ddServiceReqsTLSName = "service.request.tls.total"
ddServiceReqsDurationName = "service.request.duration"
ddServiceRetriesName = "service.retries.total"
ddServiceOpenConnsName = "service.connections.open"
ddServiceServerUpName = "service.server.up"
ddServiceReqsBytesName = "service.requests.bytes.total"
ddServiceRespsBytesName = "service.responses.bytes.total"
Expand All @@ -63,6 +62,7 @@ func RegisterDatadog(ctx context.Context, config *types.Datadog) Registry {
registry := &standardRegistry{
configReloadsCounter: datadogClient.NewCounter(ddConfigReloadsName, 1.0),
lastConfigReloadSuccessGauge: datadogClient.NewGauge(ddLastConfigReloadSuccessName),
openConnectionsGauge: datadogClient.NewGauge(ddOpenConnsName),
tlsCertsNotAfterTimestampGauge: datadogClient.NewGauge(ddTLSCertsNotAfterTimestampName),
}

Expand All @@ -71,7 +71,6 @@ func RegisterDatadog(ctx context.Context, config *types.Datadog) Registry {
registry.entryPointReqsCounter = datadogClient.NewCounter(ddEntryPointReqsName, 1.0)
registry.entryPointReqsTLSCounter = datadogClient.NewCounter(ddEntryPointReqsTLSName, 1.0)
registry.entryPointReqDurationHistogram, _ = NewHistogramWithScale(datadogClient.NewHistogram(ddEntryPointReqDurationName, 1.0), time.Second)
registry.entryPointOpenConnsGauge = datadogClient.NewGauge(ddEntryPointOpenConnsName)
registry.entryPointReqsBytesCounter = datadogClient.NewCounter(ddEntryPointReqsBytesName, 1.0)
registry.entryPointRespsBytesCounter = datadogClient.NewCounter(ddEntryPointRespsBytesName, 1.0)
}
Expand All @@ -81,7 +80,6 @@ func RegisterDatadog(ctx context.Context, config *types.Datadog) Registry {
registry.routerReqsCounter = datadogClient.NewCounter(ddRouterReqsName, 1.0)
registry.routerReqsTLSCounter = datadogClient.NewCounter(ddRouterReqsTLSName, 1.0)
registry.routerReqDurationHistogram, _ = NewHistogramWithScale(datadogClient.NewHistogram(ddRouterReqsDurationName, 1.0), time.Second)
registry.routerOpenConnsGauge = datadogClient.NewGauge(ddRouterOpenConnsName)
registry.routerReqsBytesCounter = datadogClient.NewCounter(ddRouterReqsBytesName, 1.0)
registry.routerRespsBytesCounter = datadogClient.NewCounter(ddRouterRespsBytesName, 1.0)
}
Expand All @@ -92,7 +90,6 @@ func RegisterDatadog(ctx context.Context, config *types.Datadog) Registry {
registry.serviceReqsTLSCounter = datadogClient.NewCounter(ddServiceReqsTLSName, 1.0)
registry.serviceReqDurationHistogram, _ = NewHistogramWithScale(datadogClient.NewHistogram(ddServiceReqsDurationName, 1.0), time.Second)
registry.serviceRetriesCounter = datadogClient.NewCounter(ddServiceRetriesName, 1.0)
registry.serviceOpenConnsGauge = datadogClient.NewGauge(ddServiceOpenConnsName)
registry.serviceServerUpGauge = datadogClient.NewGauge(ddServiceServerUpName)
registry.serviceReqsBytesCounter = datadogClient.NewCounter(ddServiceReqsBytesName, 1.0)
registry.serviceRespsBytesCounter = datadogClient.NewCounter(ddServiceRespsBytesName, 1.0)
Expand Down
Loading

0 comments on commit 7c2af10

Please sign in to comment.