Skip to content
This repository has been archived by the owner on Nov 14, 2020. It is now read-only.

Commit

Permalink
Improve config for returning metrics per object
Browse files Browse the repository at this point in the history
Since metrics are now aggregated by default, it made more sense to use
the inverse meaning of disabling aggregation, and call it a positive and
explicit action: return_per_object_metrics.

Naming pair: @michaelklishin

Signed-off-by: Gerhard Lazu <gerhard@lazu.co.uk>
  • Loading branch information
gerhard committed Feb 11, 2020
1 parent db3ffc5 commit 8b0c7c4
Show file tree
Hide file tree
Showing 7 changed files with 25 additions and 29 deletions.
2 changes: 1 addition & 1 deletion Makefile
Expand Up @@ -11,7 +11,7 @@ OTP_SHA256 := 4cf44ed12f657c309a2c00e7806f36f56a88e5b74de6814058796561f3842f66

define PROJECT_ENV
[
{enable_metrics_aggregation, true}
{return_per_object_metrics, false}
]
endef

Expand Down
12 changes: 6 additions & 6 deletions README.md
Expand Up @@ -37,7 +37,7 @@ See the entire list of [metrics](metrics.md) exposed via the default port.

This exporter supports the following options via a set of `prometheus.*` configuration keys:

* `prometheus.enable_metrics_aggregation` returns all metrics aggregated (default is `true`). See #26 for more details.
* `prometheus.return_per_object_metrics` returns all metrics per object, unaggregated (default is `false`). See #26 for more details.
* `prometheus.path` defines a scrape endpoint (default is `"/metrics"`).
* `prometheus.tcp.*` controls HTTP listener settings that match [those used by the RabbitMQ HTTP API](https://www.rabbitmq.com/management.html#configuration)
* `prometheus.ssl.*` controls TLS (HTTPS) listener settings that match [those used by the RabbitMQ HTTP API](https://www.rabbitmq.com/management.html#single-listener-https)
Expand All @@ -46,25 +46,25 @@ Sample configuration snippet:

```ini
# these values are defaults
prometheus.enable_metrics_aggregation = true
prometheus.return_per_object_metrics = false
prometheus.path = /metrics
prometheus.tcp.port = 15692
```

When raw metrics are enabled, nodes with 80k queues have been measured to take 58 seconds to return 1.9 million metrics in a 98MB response payload.
When metrics are returned per object, nodes with 80k queues have been measured to take 58 seconds to return 1.9 million metrics in a 98MB response payload.
In order to not put unnecessary pressure on your metrics system, metrics are aggregated by default.

When debugging, it may be useful to enable per-object (unaggregated) metrics.
When debugging, it may be useful to return metrics per object (unaggregated).
This can be enabled on-the-fly, without restarting or configuring RabbitMQ, using the following command:

```
rabbitmqctl eval 'application:set_env(rabbitmq_prometheus, enable_metric_aggregation, true).'
rabbitmqctl eval 'application:set_env(rabbitmq_prometheus, return_per_object_metrics, true).'
```

To go back to aggregated metrics on-the-fly, run the following command:

```
rabbitmqctl eval 'application:set_env(rabbitmq_prometheus, enable_metric_aggregation, false).'
rabbitmqctl eval 'application:set_env(rabbitmq_prometheus, return_per_object_metrics, false).'
```


Expand Down
4 changes: 2 additions & 2 deletions docker/rabbitmq-overview.conf
Expand Up @@ -27,6 +27,6 @@ collect_statistics_interval = 10000
# https://github.com/rabbitmq/rabbitmq-management/pull/707
# management.disable_stats = true

# Aggregate all metrics
# Return per-object metrics (unaggregated)
# https://github.com/rabbitmq/rabbitmq-prometheus/pull/28
# prometheus.enable_metrics_aggregation = true
# prometheus.return_per_object_metrics = true
4 changes: 2 additions & 2 deletions docker/rabbitmq-qq.conf
Expand Up @@ -27,6 +27,6 @@ collect_statistics_interval = 10000
# Enable debugging
# log.console.level = debug

# Aggregate all metrics
# Return per-object metrics (unaggregated)
# https://github.com/rabbitmq/rabbitmq-prometheus/pull/28
# prometheus.enable_metrics_aggregation = true
prometheus.return_per_object_metrics = true
4 changes: 2 additions & 2 deletions priv/schema/rabbitmq_prometheus.schema
Expand Up @@ -4,8 +4,8 @@
%% See https://rabbitmq.com/prometheus.html for details
%% ----------------------------------------------------------------------------

%% Option to enable metrics aggregation
{mapping, "prometheus.enable_metrics_aggregation", "rabbitmq_prometheus.enable_metrics_aggregation",
%% Option to return metrics per-object, unaggregated
{mapping, "prometheus.return_per_object_metrics", "rabbitmq_prometheus.return_per_object_metrics",
[{datatype, {enum, [true, false]}}]}.

%% Endpoint path
Expand Down
18 changes: 7 additions & 11 deletions src/collectors/prometheus_rabbitmq_core_metrics_collector.erl
Expand Up @@ -219,9 +219,9 @@ register() ->
deregister_cleanup(_) -> ok.

collect_mf(_Registry, Callback) ->
{ok, Enable} = application:get_env(rabbitmq_prometheus, enable_metrics_aggregation),
{ok, PerObjectMetrics} = application:get_env(rabbitmq_prometheus, return_per_object_metrics),
[begin
Data = get_data(Table, Enable),
Data = get_data(Table, PerObjectMetrics),
mf(Callback, Contents, Data)
end || {Table, Contents} <- ?METRICS_RAW],
[begin
Expand Down Expand Up @@ -375,7 +375,7 @@ emit_gauge_metric_if_defined(Labels, Value) ->
gauge_metric(Labels, Value)
end.

get_data(connection_metrics = Table, true) ->
get_data(connection_metrics = Table, false) ->
{Table, A1, A2, A3, A4} = ets:foldl(fun({_, Props}, {T, A1, A2, A3, A4}) ->
{T,
sum(proplists:get_value(recv_cnt, Props), A1),
Expand All @@ -384,9 +384,7 @@ get_data(connection_metrics = Table, true) ->
sum(proplists:get_value(channels, Props), A4)}
end, empty(Table), Table),
[{Table, [{recv_cnt, A1}, {send_cnt, A2}, {send_pend, A3}, {channels, A4}]}];


get_data(channel_metrics = Table, true) ->
get_data(channel_metrics = Table, false) ->
{Table, A1, A2, A3, A4, A5, A6, A7} =
ets:foldl(fun({_, Props}, {T, A1, A2, A3, A4, A5, A6, A7}) ->
{T,
Expand All @@ -401,8 +399,7 @@ get_data(channel_metrics = Table, true) ->
[{Table, [{consumer_count, A1}, {messages_unacknowledged, A2}, {messages_unconfirmed, A3},
{messages_uncommitted, A4}, {acks_uncommitted, A5}, {prefetch_count, A6},
{global_prefetch_count, A7}]}];

get_data(queue_metrics = Table, true) ->
get_data(queue_metrics = Table, false) ->
{Table, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16} =
ets:foldl(fun({_, Props, _}, {T, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10,
A11, A12, A13, A14, A15, A16}) ->
Expand Down Expand Up @@ -432,8 +429,7 @@ get_data(queue_metrics = Table, true) ->
{message_bytes_ready, A11}, {message_bytes_unacknowledged, A12},
{messages_paged_out, A13}, {message_bytes_paged_out, A14},
{disk_reads, A15}, {disk_writes, A16}]}];

get_data(Table, true) when Table == channel_exchange_metrics;
get_data(Table, false) when Table == channel_exchange_metrics;
Table == queue_coarse_metrics;
Table == channel_queue_metrics;
Table == connection_coarse_metrics;
Expand Down Expand Up @@ -470,7 +466,7 @@ get_data(Table, _) ->
division(0, 0) ->
0;
division(A, B) ->
A/B.
A / B.

accumulate_count_and_sum(Value, {Count, Sum}) ->
{Count + 1, Sum + Value}.
Expand Down
10 changes: 5 additions & 5 deletions test/rabbit_prometheus_http_SUITE.erl
Expand Up @@ -29,7 +29,7 @@ all() ->
{group, config_path},
{group, config_port},
{group, aggregated_metrics},
{group, individual_metrics}
{group, per_object_metrics}
].

groups() ->
Expand All @@ -38,7 +38,7 @@ groups() ->
{config_path, [], all_tests()},
{config_port, [], all_tests()},
{aggregated_metrics, [], [aggregated_metrics_test, build_info_test, identity_info_test]},
{individual_metrics, [], [individual_metrics_test, build_info_test, identity_info_test]}
{per_object_metrics, [], [per_object_metrics_test, build_info_test, identity_info_test]}
].

all_tests() ->
Expand All @@ -62,8 +62,8 @@ init_per_group(config_port, Config0) ->
PathConfig = {rabbitmq_prometheus, [{tcp_config, [{port, 15772}]}]},
Config1 = rabbit_ct_helpers:merge_app_env(Config0, PathConfig),
init_per_group(config_port, Config1, [{prometheus_port, 15772}]);
init_per_group(individual_metrics, Config0) ->
PathConfig = {rabbitmq_prometheus, [{enable_metrics_aggregation, false}]},
init_per_group(per_object_metrics, Config0) ->
PathConfig = {rabbitmq_prometheus, [{return_per_object_metrics, true}]},
Config1 = rabbit_ct_helpers:merge_app_env(Config0, PathConfig),
init_per_group(aggregated_metrics, Config1);
init_per_group(aggregated_metrics, Config0) ->
Expand Down Expand Up @@ -209,7 +209,7 @@ aggregated_metrics_test(Config) ->
%% Checking raft_entry_commit_latency_seconds because we are aggregating it
?assertEqual(match, re:run(Body, "^rabbitmq_raft_entry_commit_latency_seconds ", [{capture, none}, multiline])).

individual_metrics_test(Config) ->
per_object_metrics_test(Config) ->
{_Headers, Body} = http_get(Config, [], 200),
%% Checking that the body looks like a valid response
ct:pal(Body),
Expand Down

0 comments on commit 8b0c7c4

Please sign in to comment.