Improve config for returning metrics per object

Since metrics are now aggregated by default, it made more sense to use the inverse meaning of disabling aggregation, and call it a positive and explicit action: return_per_object_metrics. Naming pair: @michaelklishin Signed-off-by: Gerhard Lazu <gerhard@lazu.co.uk>
rabbitmq · Feb 11, 2020 · 8b0c7c4 · 8b0c7c4
1 parent db3ffc5
commit 8b0c7c4
Show file tree

Hide file tree

Showing 7 changed files with 25 additions and 29 deletions.
diff --git a/Makefile b/Makefile
@@ -11,7 +11,7 @@ OTP_SHA256 := 4cf44ed12f657c309a2c00e7806f36f56a88e5b74de6814058796561f3842f66
 
 define PROJECT_ENV
 [
-	{enable_metrics_aggregation, true}
+	{return_per_object_metrics, false}
 ]
 endef
 

diff --git a/README.md b/README.md
@@ -37,7 +37,7 @@ See the entire list of [metrics](metrics.md) exposed via the default port.
 
 This exporter supports the following options via a set of `prometheus.*` configuration keys:
 
- * `prometheus.enable_metrics_aggregation` returns all metrics aggregated (default is `true`). See #26 for more details.
+ * `prometheus.return_per_object_metrics` returns all metrics per object, unaggregated (default is `false`). See #26 for more details.
  * `prometheus.path` defines a scrape endpoint (default is `"/metrics"`).
  * `prometheus.tcp.*` controls HTTP listener settings that match [those used by the RabbitMQ HTTP API](https://www.rabbitmq.com/management.html#configuration)
  * `prometheus.ssl.*` controls TLS (HTTPS) listener settings that match [those used by the RabbitMQ HTTP API](https://www.rabbitmq.com/management.html#single-listener-https)
@@ -46,25 +46,25 @@ Sample configuration snippet:
 
 ```ini
 # these values are defaults
-prometheus.enable_metrics_aggregation = true
+prometheus.return_per_object_metrics = false
 prometheus.path = /metrics
 prometheus.tcp.port =  15692
 ```
 
-When raw metrics are enabled, nodes with 80k queues have been measured to take 58 seconds to return 1.9 million metrics in a 98MB response payload.
+When metrics are returned per object, nodes with 80k queues have been measured to take 58 seconds to return 1.9 million metrics in a 98MB response payload.
 In order to not put unnecessary pressure on your metrics system, metrics are aggregated by default.
 
-When debugging, it may be useful to enable per-object (unaggregated) metrics.
+When debugging, it may be useful to return metrics per object (unaggregated).
 This can be enabled on-the-fly, without restarting or configuring RabbitMQ, using the following command:
 
 ```
-rabbitmqctl eval 'application:set_env(rabbitmq_prometheus, enable_metric_aggregation, true).'
+rabbitmqctl eval 'application:set_env(rabbitmq_prometheus, return_per_object_metrics, true).'
 ```
 
 To go back to aggregated metrics on-the-fly, run the following command:
 
 ```
-rabbitmqctl eval 'application:set_env(rabbitmq_prometheus, enable_metric_aggregation, false).'
+rabbitmqctl eval 'application:set_env(rabbitmq_prometheus, return_per_object_metrics, false).'
 ```
 
 

diff --git a/docker/rabbitmq-overview.conf b/docker/rabbitmq-overview.conf
@@ -27,6 +27,6 @@ collect_statistics_interval = 10000
 # https://github.com/rabbitmq/rabbitmq-management/pull/707
 # management.disable_stats = true
 
-# Aggregate all metrics
+# Return per-object metrics (unaggregated)
 # https://github.com/rabbitmq/rabbitmq-prometheus/pull/28
-# prometheus.enable_metrics_aggregation = true
+# prometheus.return_per_object_metrics = true
diff --git a/docker/rabbitmq-qq.conf b/docker/rabbitmq-qq.conf
@@ -27,6 +27,6 @@ collect_statistics_interval = 10000
 # Enable debugging
 # log.console.level = debug
 
-# Aggregate all metrics
+# Return per-object metrics (unaggregated)
 # https://github.com/rabbitmq/rabbitmq-prometheus/pull/28
-# prometheus.enable_metrics_aggregation = true
+prometheus.return_per_object_metrics = true
diff --git a/priv/schema/rabbitmq_prometheus.schema b/priv/schema/rabbitmq_prometheus.schema
@@ -4,8 +4,8 @@
 %% See https://rabbitmq.com/prometheus.html for details
 %% ----------------------------------------------------------------------------
 
-%% Option to enable metrics aggregation
-{mapping, "prometheus.enable_metrics_aggregation", "rabbitmq_prometheus.enable_metrics_aggregation",
+%% Option to return metrics per-object, unaggregated
+{mapping, "prometheus.return_per_object_metrics", "rabbitmq_prometheus.return_per_object_metrics",
     [{datatype, {enum, [true, false]}}]}.
 
 %% Endpoint path

diff --git a/src/collectors/prometheus_rabbitmq_core_metrics_collector.erl b/src/collectors/prometheus_rabbitmq_core_metrics_collector.erl
@@ -219,9 +219,9 @@ register() ->
 deregister_cleanup(_) -> ok.
 
 collect_mf(_Registry, Callback) ->
-    {ok, Enable} = application:get_env(rabbitmq_prometheus, enable_metrics_aggregation),
+    {ok, PerObjectMetrics} = application:get_env(rabbitmq_prometheus, return_per_object_metrics),
     [begin
-         Data = get_data(Table, Enable),
+         Data = get_data(Table, PerObjectMetrics),
          mf(Callback, Contents, Data)
      end || {Table, Contents} <- ?METRICS_RAW],
     [begin
@@ -375,7 +375,7 @@ emit_gauge_metric_if_defined(Labels, Value) ->
       gauge_metric(Labels, Value)
   end.
 
-get_data(connection_metrics = Table, true) ->
+get_data(connection_metrics = Table, false) ->
     {Table, A1, A2, A3, A4} = ets:foldl(fun({_, Props}, {T, A1, A2, A3, A4}) ->
                                             {T,
                                              sum(proplists:get_value(recv_cnt, Props), A1),
@@ -384,9 +384,7 @@ get_data(connection_metrics = Table, true) ->
                                              sum(proplists:get_value(channels, Props), A4)}
                                     end, empty(Table), Table),
     [{Table, [{recv_cnt, A1}, {send_cnt, A2}, {send_pend, A3}, {channels, A4}]}];
-
-
-get_data(channel_metrics = Table, true) ->
+get_data(channel_metrics = Table, false) ->
     {Table, A1, A2, A3, A4, A5, A6, A7} =
         ets:foldl(fun({_, Props}, {T, A1, A2, A3, A4, A5, A6, A7}) ->
                           {T,
@@ -401,8 +399,7 @@ get_data(channel_metrics = Table, true) ->
      [{Table, [{consumer_count, A1}, {messages_unacknowledged, A2}, {messages_unconfirmed, A3},
                {messages_uncommitted, A4}, {acks_uncommitted, A5}, {prefetch_count, A6},
                {global_prefetch_count, A7}]}];
-
-get_data(queue_metrics = Table, true) ->
+get_data(queue_metrics = Table, false) ->
     {Table, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16} =
         ets:foldl(fun({_, Props, _}, {T, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10,
                                       A11, A12, A13, A14, A15, A16}) ->
@@ -432,8 +429,7 @@ get_data(queue_metrics = Table, true) ->
                {message_bytes_ready, A11}, {message_bytes_unacknowledged, A12},
                {messages_paged_out, A13}, {message_bytes_paged_out, A14},
                {disk_reads, A15}, {disk_writes, A16}]}];
-
-get_data(Table, true) when Table == channel_exchange_metrics;
+get_data(Table, false) when Table == channel_exchange_metrics;
                            Table == queue_coarse_metrics;
                            Table == channel_queue_metrics;
                            Table == connection_coarse_metrics;
@@ -470,7 +466,7 @@ get_data(Table, _) ->
 division(0, 0) ->
     0;
 division(A, B) ->
-    A/B.
+    A / B.
 
 accumulate_count_and_sum(Value, {Count, Sum}) ->
     {Count + 1, Sum + Value}.

diff --git a/test/rabbit_prometheus_http_SUITE.erl b/test/rabbit_prometheus_http_SUITE.erl
@@ -29,7 +29,7 @@ all() ->
      {group, config_path},
      {group, config_port},
      {group, aggregated_metrics},
-     {group, individual_metrics}
+     {group, per_object_metrics}
     ].
 
 groups() ->
@@ -38,7 +38,7 @@ groups() ->
      {config_path, [], all_tests()},
      {config_port, [], all_tests()},
      {aggregated_metrics, [], [aggregated_metrics_test, build_info_test, identity_info_test]},
-     {individual_metrics, [], [individual_metrics_test, build_info_test, identity_info_test]}
+     {per_object_metrics, [], [per_object_metrics_test, build_info_test, identity_info_test]}
     ].
 
 all_tests() ->
@@ -62,8 +62,8 @@ init_per_group(config_port, Config0) ->
     PathConfig = {rabbitmq_prometheus, [{tcp_config, [{port, 15772}]}]},
     Config1 = rabbit_ct_helpers:merge_app_env(Config0, PathConfig),
     init_per_group(config_port, Config1, [{prometheus_port, 15772}]);
-init_per_group(individual_metrics, Config0) ->
-    PathConfig = {rabbitmq_prometheus, [{enable_metrics_aggregation, false}]},
+init_per_group(per_object_metrics, Config0) ->
+    PathConfig = {rabbitmq_prometheus, [{return_per_object_metrics, true}]},
     Config1 = rabbit_ct_helpers:merge_app_env(Config0, PathConfig),
     init_per_group(aggregated_metrics, Config1);
 init_per_group(aggregated_metrics, Config0) ->
@@ -209,7 +209,7 @@ aggregated_metrics_test(Config) ->
     %% Checking raft_entry_commit_latency_seconds because we are aggregating it
     ?assertEqual(match, re:run(Body, "^rabbitmq_raft_entry_commit_latency_seconds ", [{capture, none}, multiline])).
 
-individual_metrics_test(Config) ->
+per_object_metrics_test(Config) ->
     {_Headers, Body} = http_get(Config, [], 200),
     %% Checking that the body looks like a valid response
     ct:pal(Body),