alternator ttl: add metrics

This patch adds metrics to the Alternator TTL feature (aka the "expiration service"). I put these metrics deliberately in their own object in ttl.{hh,cc}, and also with their own prefix ("expiration_*") - and *not* together with the rest of the Alternator metrics (alternator/stats.{hh,cc}). This is because later we may want to use the expiration service not only in Alternator but also in CQL - to support per-item expiration with CDC events also in CQL. So the implementation of this feature should not be too tangled with that of Alternator. The patch currently adds four metrics, and opens the path to easily add more in the future. The metrics added now are: 1. scylla_expiration_scan_passes: The number of scan passes over the entire table. We expect this to grow by 1 every alternator_ttl_period_in_seconds seconds. 2. scylla_expiration_scan_table: The number of table scans. In each scan pass, we scan all the tables that have the Alternator TTL feature enabled. Each scan of each table is counted by this counter. 3. scylla_expiration_items_deleted: Counts the number of items that the expiration service expired (deleted). Please remember that each item is considered for expiration - and then expired - on only one node, so each expired item is counted only once - not RF times. 4. scylla_expiration_secondary_ranges_scanned: If this counter is incremented, it means this node took over some other node's expiration scanning duties while the other node was down. This patch also includes a couple of unrelated comment fixes. I tested the new metrics manually - they aren't yet tested by the Alternator test suite because I couldn't make up my mind if such tests would belong in test_ttl.py or test_metrics.py :-) Signed-off-by: Nadav Har'El <nyh@scylladb.com> Message-Id: <20220224092419.1132655-1-nyh@scylladb.com>
scylladb · Feb 25, 2022 · c262309 · c262309
1 parent ec59f7a
commit c262309
Show file tree

Hide file tree

Showing 2 changed files with 44 additions and 10 deletions.
diff --git a/alternator/ttl.cc b/alternator/ttl.cc
@@ -172,8 +172,6 @@ expiration_service::expiration_service(data_dictionary::database db, service::st
         : _db(db)
         , _proxy(proxy)
 {
-    //FIXME: add metrics for the service
-    //setup_metrics();
 }
 
 // Convert the big_decimal used to represent expiration time to an integer.
@@ -528,13 +526,14 @@ struct scan_ranges_context {
 // Scan data in a list of token ranges in one table, looking for expired
 // items and deleting them.
 // Because of issue #9167, partition_ranges must have a single partition
-// for this code to work correctly.
+// range for this code to work correctly.
 static future<> scan_table_ranges(
         service::storage_proxy& proxy,
         const scan_ranges_context& scan_ctx,
         dht::partition_range_vector&& partition_ranges,
         abort_source& abort_source,
-        named_semaphore& page_sem)
+        named_semaphore& page_sem,
+        expiration_service::stats& expiration_stats)
 {
     const schema_ptr& s = scan_ctx.s;
     assert (partition_ranges.size() == 1); // otherwise issue #9167 will cause incorrect results.
@@ -602,6 +601,7 @@ static future<> scan_table_ranges(
                 expired = is_expired(n, now);
             }
             if (expired) {
+                expiration_stats.items_deleted++;
                 // FIXME: maybe don't recalculate new_timestamp() all the time
                 // FIXME: if expire_item() throws on timeout, we need to retry it.
                 auto ts = api::new_timestamp();
@@ -637,7 +637,8 @@ static future<bool> scan_table(
     data_dictionary::database db,
     schema_ptr s,
     abort_source& abort_source,
-    named_semaphore& page_sem)
+    named_semaphore& page_sem,
+    expiration_service::stats& expiration_stats)
 {
     // Check if an expiration-time attribute is enabled for this table.
     // If not, just return false immediately.
@@ -683,9 +684,8 @@ static future<bool> scan_table(
         tlogger.info("table {} TTL column has unsupported type, not scanning", s->cf_name());
         co_return false;
     }
+    expiration_stats.scan_table++;
     // FIXME: need to pace the scan, not do it all at once.
-    // FIXME: consider if we should ask the scan without caching?
-    // can we use cache but not fill it?
     scan_ranges_context scan_ctx{s, proxy, std::move(column_name), std::move(member)};
     token_ranges_owned_by_this_shard<primary> my_ranges(db.real_database(), s);
     while (std::optional<dht::partition_range> range = my_ranges.next_partition_range()) {
@@ -698,7 +698,7 @@ static future<bool> scan_table(
         // we fail the entire scan (and rescan from the beginning). Need to
         // reconsider this. Saving the scan position might be a good enough
         // solution for this problem.
-        co_await scan_table_ranges(proxy, scan_ctx, std::move(partition_ranges), abort_source, page_sem);
+        co_await scan_table_ranges(proxy, scan_ctx, std::move(partition_ranges), abort_source, page_sem, expiration_stats);
     }
     // If each node only scans its own primary ranges, then when any node is
     // down part of the token range will not get scanned. This can be viewed
@@ -709,9 +709,10 @@ static future<bool> scan_table(
     // on its *secondary* ranges - but only those whose primary owner is down.
     token_ranges_owned_by_this_shard<secondary> my_secondary_ranges(db.real_database(), s);
     while (std::optional<dht::partition_range> range = my_secondary_ranges.next_partition_range()) {
+        expiration_stats.secondary_ranges_scanned++;
         dht::partition_range_vector partition_ranges;
         partition_ranges.push_back(std::move(*range));
-        co_await scan_table_ranges(proxy, scan_ctx, std::move(partition_ranges), abort_source, page_sem);
+        co_await scan_table_ranges(proxy, scan_ctx, std::move(partition_ranges), abort_source, page_sem, expiration_stats);
     }
     co_return true;
 }
@@ -738,7 +739,7 @@ future<> expiration_service::run() {
                 co_return;
             }
             try {
-                co_await scan_table(_proxy, _db, s, _abort_source, _page_sem);
+                co_await scan_table(_proxy, _db, s, _abort_source, _page_sem, _expiration_stats);
             } catch (...) {
                 // The scan of a table may fail in the middle for many
                 // reasons, including network failure and even the table
@@ -757,6 +758,7 @@ future<> expiration_service::run() {
                 }
             }
         }
+        _expiration_stats.scan_passes++;
         // The TTL scanner runs above once over all tables, at full steam.
         // After completing such a scan, we sleep until it's time start
         // another scan. TODO: If the scan went too fast, we can slow it down
@@ -799,4 +801,18 @@ future<> expiration_service::stop() {
     return std::move(*_end);
 }
 
+expiration_service::stats::stats() {
+    _metrics.add_group("expiration", {
+        seastar::metrics::make_total_operations("scan_passes", scan_passes,
+            seastar::metrics::description("number of passes over the database")),
+        seastar::metrics::make_total_operations("scan_table", scan_table,
+            seastar::metrics::description("number of table scans (counting each scan of each table that enabled expiration)")),
+        seastar::metrics::make_total_operations("items_deleted", items_deleted,
+            seastar::metrics::description("number of items deleted after expiration")),
+        seastar::metrics::make_total_operations("secondary_ranges_scanned", secondary_ranges_scanned,
+            seastar::metrics::description("number of token ranges scanned by this node while their primary owner was down")),
+    });
+}
+
+
 } // namespace alternator
diff --git a/alternator/ttl.hh b/alternator/ttl.hh
@@ -27,6 +27,23 @@ namespace alternator {
 // items in all tables with per-item expiration enabled. Currently, this means
 // Alternator tables with TTL configured via a UpdateTimeToLeave request.
 class expiration_service final : public seastar::peering_sharded_service<expiration_service> {
+public:
+    // Object holding per-shard statistics related to the expiration service.
+    // While this object is alive, these metrics are also registered to be
+    // visible by the metrics REST API, with the "expiration_" prefix.
+    class stats {
+    public:
+        stats();
+        uint64_t scan_passes = 0;
+        uint64_t scan_table = 0;
+        uint64_t items_deleted = 0;
+        uint64_t secondary_ranges_scanned = 0;
+    private:
+        // The metric_groups object holds this stat object's metrics registered
+        // as long as the stats object is alive.
+        seastar::metrics::metric_groups _metrics;
+    };
+private:
     data_dictionary::database _db;
     service::storage_proxy& _proxy;
     // _end is set by start(), and resolves when the the background service
@@ -37,6 +54,7 @@ class expiration_service final : public seastar::peering_sharded_service<expirat
     // Ensures that at most 1 page of scan results at a time is processed by the TTL service
     named_semaphore _page_sem{1, named_semaphore_exception_factory{"alternator_ttl"}};
     bool shutting_down() { return _abort_source.abort_requested(); }
+    stats _expiration_stats;
 public:
     // sharded_service<expiration_service>::start() creates this object on
     // all shards, so calls this constructor on each shard. Later, the