storage_proxy: Make split_stats resilient to being called from differ…

…ent scheduling group Fixes #11017 When doing writes, storage proxy creates types deriving from abstract_write_response_handler. These are created in the various scheduling groups executing the write inducing code. They pick up a group-local reference to the various metrics used by SP. Normally all code using (and esp. modifying) these metrics are executed in the same scheduling group. However, if gossip sees a node go down, it will notify listeners, which eventually calls get_ep_stat and register_metrics. This code (before this patch) uses _active_ scheduling group to eventually add metrics, using a local dict as guard against double regs. If, as described above, we're called in a different sched group than the original one however, this can cause double registrations. Fixed here by keeping a reference to creating scheduling group and using this, not active one, when/if creating new metrics. Closes #14636
scylladb · Jul 12, 2023 · 1088c3e · 1088c3e
1 parent c9cb8dc
commit 1088c3e
Show file tree

Hide file tree

Showing 3 changed files with 41 additions and 4 deletions.
diff --git a/service/storage_proxy.cc b/service/storage_proxy.cc
@@ -116,8 +116,12 @@ static const seastar::metrics::label op_type_label("op_type");
 static const seastar::metrics::label scheduling_group_label("scheduling_group_name");
 static const seastar::metrics::label rejected_by_coordinator_label("rejected_by_coordinator");
 
+seastar::metrics::label_instance make_scheduling_group_label(const scheduling_group& sg) {
+    return scheduling_group_label(sg.name());
+}
+
 seastar::metrics::label_instance current_scheduling_group_label() {
-    return scheduling_group_label(current_scheduling_group().name());
+    return make_scheduling_group_label(current_scheduling_group());
 }
 
 }
@@ -2329,7 +2333,8 @@ storage_proxy_stats::split_stats::split_stats(const sstring& category, const sst
         , _long_description_prefix(long_description_prefix)
         , _category(category)
         , _op_type(op_type)
-        , _auto_register_metrics(auto_register_metrics) { }
+        , _auto_register_metrics(auto_register_metrics) 
+        , _sg(current_scheduling_group()) { }
 
 storage_proxy_stats::write_stats::write_stats()
 : writes_attempts(COORDINATOR_STATS_CATEGORY, "total_write_attempts", "total number of write requests", "mutation_data")
@@ -2642,7 +2647,7 @@ void storage_proxy_stats::split_stats::register_metrics_local() {
 
     _metrics.add_group(_category, {
         sm::make_counter(_short_description_prefix + sstring("_local_node"), [this] { return _local.val; },
-                       sm::description(_long_description_prefix + "on a local Node"), {storage_proxy_stats::current_scheduling_group_label(), op_type_label(_op_type)})
+                       sm::description(_long_description_prefix + "on a local Node"), {storage_proxy_stats::make_scheduling_group_label(_sg), op_type_label(_op_type)})
     });
 }
 
@@ -2654,7 +2659,7 @@ void storage_proxy_stats::split_stats::register_metrics_for(sstring dc, gms::ine
     if (auto [ignored, added] = _dc_stats.try_emplace(dc); added) {
         _metrics.add_group(_category, {
             sm::make_counter(_short_description_prefix + sstring("_remote_node"), [this, dc] { return _dc_stats[dc].val; },
-                            sm::description(seastar::format("{} when communicating with external Nodes in another DC", _long_description_prefix)), {storage_proxy_stats::current_scheduling_group_label(), datacenter_label(dc), op_type_label(_op_type)})
+                            sm::description(seastar::format("{} when communicating with external Nodes in another DC", _long_description_prefix)), {storage_proxy_stats::make_scheduling_group_label(_sg), datacenter_label(dc), op_type_label(_op_type)})
                                 .set_skip_when_empty()
         });
     }

diff --git a/service/storage_proxy_stats.hh b/service/storage_proxy_stats.hh
@@ -44,6 +44,7 @@ private:
     // whether to register per-endpoint metrics automatically
     bool _auto_register_metrics;
 
+    scheduling_group _sg;
 public:
     /**
      * @param category a statistics category, e.g. "client" or "replica"

diff --git a/test/boost/storage_proxy_test.cc b/test/boost/storage_proxy_test.cc
@@ -9,6 +9,7 @@
 
 #include <seastar/core/thread.hh>
 #include <seastar/testing/test_case.hh>
+#include <seastar/testing/thread_test_case.hh>
 #include "query-result-writer.hh"
 
 #include "test/lib/cql_test_env.hh"
@@ -103,3 +104,33 @@ SEASTAR_TEST_CASE(test_get_restricted_ranges) {
         });
     });
 }
+
+SEASTAR_THREAD_TEST_CASE(test_split_stats) {
+    auto ep1 = gms::inet_address("127.0.0.1");
+    auto sg1 = create_scheduling_group("apa1", 100).get();
+    auto sg2 = create_scheduling_group("apa2", 100).get();
+
+    std::optional<service::storage_proxy_stats::split_stats> stats1, stats2;
+
+    // pretending to be abstract_write_response_handler type. 
+    // created in various scheduling groups, in which they 
+    // instantiate group-local split_stats.
+    with_scheduling_group(sg1, [&] {
+        stats1.emplace("tuta", "nils", "en nils", "nilsa", true);
+    }).get0();
+
+    with_scheduling_group(sg2, [&] {
+        stats2.emplace("tuta", "nils", "en nils", "nilsa", true);
+    }).get0();
+
+    // simulating the calling of storage_proxy::on_down, from gossip
+    // on node dropping out. If inside a write operation, we'll pick up
+    // write handlers and to "timeout_cb" on them, which in turn might
+    // call get_ep_stat, which evenually calls register_metrics for 
+    // the DC written to.
+    // Point being is that either the above should not happen, or 
+    // split_stats should be resilient to being called from different
+    // scheduling group.
+    stats1->register_metrics_for("DC1", ep1);
+    stats2->register_metrics_for("DC1", ep1);
+}