redpanda-data · dotnwat · Jun 15, 2021 · May 28, 2021 · May 28, 2021 · May 28, 2021
diff --git a/src/v/kafka/server/connection_context.cc b/src/v/kafka/server/connection_context.cc
@@ -115,7 +115,8 @@ ss::future<> connection_context::handle_auth_v0(const size_t size) {
           std::move(request_buf),
           0s);
         auto resp = co_await kafka::process_request(
-          std::move(ctx), _proto.smp_group());
+                      std::move(ctx), _proto.smp_group())
+                      .response;
         auto data = std::move(*resp).release();
         response.decode(std::move(data), version);
     }
@@ -224,16 +225,14 @@ connection_context::dispatch_method_once(request_header hdr, size_t size) {
                   std::move(buf),
                   sres.backpressure_delay);
                 /*
-                 * until authentication is complete, process requests in order
-                 * since all subsequent requests are dependent on authentication
-                 * having completed.
+                 * we process requests in order since all subsequent requests
+                 * are dependent on authentication having completed.
                  *
-                 * the other important reason for disabling pipeling before
-                 * authentication completes is because when a sasl handshake
-                 * with version=0 is processed, the next data on the wire is
-                 * _not_ another request: it is a size-prefixed authentication
-                 * payload without a request envelope, and requires special
-                 * handling.
+                 * the other important reason for disabling pipeling is because
+                 * when a sasl handshake with version=0 is processed, the next
+                 * data on the wire is _not_ another request: it is a
+                 * size-prefixed authentication payload without a request
+                 * envelope, and requires special handling.
                  *
                  * a well behaved client should implicitly provide a data stream
                  * that invokes this behavior in the server: that is, it won't
@@ -245,47 +244,52 @@ connection_context::dispatch_method_once(request_header hdr, size_t size) {
                  * stream at best and at worst some odd behavior.
                  */
 
-                if (unlikely(!sasl().complete())) {
-                    return do_process(std::move(rctx))
-                      .handle_exception([self](std::exception_ptr e) {
-                          vlog(
-                            klog.info,
-                            "Detected error processing request: {}",
-                            e);
-                          self->_rs.conn->shutdown_input();
-                      })
-                      .finally([s = std::move(sres), self] {});
-                }
-
-                (void)ss::try_with_gate(
-                  _rs.conn_gate(),
-                  [this, rctx = std::move(rctx)]() mutable {
-                      return do_process(std::move(rctx));
+                const auto correlation = rctx.header().correlation;
+                const sequence_id seq = _seq_idx;
+                _seq_idx = _seq_idx + sequence_id(1);
+                auto res = kafka::process_request(
+                  std::move(rctx), _proto.smp_group());
+                /**
+                 * first stage processed in a foreground.
+                 */
+                return res.dispatched
+                  .then([this,
+                         f = std::move(res.response),
+                         seq,
+                         correlation,
+                         self,
+                         s = std::move(sres)]() mutable {
+                      /**
+                       * second stage processed in background.
+                       */
+                      (void)ss::try_with_gate(
+                        _rs.conn_gate(),
+                        [this, f = std::move(f), seq, correlation]() mutable {
+                            return f.then(
+                              [this, seq, correlation](response_ptr r) mutable {
+                                  r->set_correlation(correlation);
+                                  _responses.insert({seq, std::move(r)});
+                                  return process_next_response();
+                              });
+                        })
+                        .handle_exception([self](std::exception_ptr e) {
+                            vlog(
+                              klog.info,
+                              "Detected error processing request: {}",
+                              e);
+                            self->_rs.conn->shutdown_input();
+                        })
+                        .finally([s = std::move(s), self] {});
                   })
                   .handle_exception([self](std::exception_ptr e) {
                       vlog(
                         klog.info, "Detected error processing request: {}", e);
                       self->_rs.conn->shutdown_input();
-                  })
-                  .finally([s = std::move(sres), self] {});
-
-                return ss::now();
+                  });
             });
       });
 }
 
-ss::future<> connection_context::do_process(request_context ctx) {
-    const auto correlation = ctx.header().correlation;
-    const sequence_id seq = _seq_idx;
-    _seq_idx = _seq_idx + sequence_id(1);
-    return kafka::process_request(std::move(ctx), _proto.smp_group())
-      .then([this, seq, correlation](response_ptr r) mutable {
-          r->set_correlation(correlation);
-          _responses.insert({seq, std::move(r)});
-          return process_next_response();
-      });
-}
-
 ss::future<> connection_context::process_next_response() {
     return ss::repeat([this]() mutable {
         auto it = _responses.find(_next_response);

diff --git a/src/v/kafka/server/group.cc b/src/v/kafka/server/group.cc
@@ -1600,8 +1600,7 @@ group::store_txn_offsets(txn_offset_commit_request r) {
     co_return txn_offset_commit_response(r, error_code::none);
 }
 
-ss::future<offset_commit_response>
-group::store_offsets(offset_commit_request&& r) {
+group::offset_commit_stages group::store_offsets(offset_commit_request&& r) {
     cluster::simple_batch_builder builder(
       model::record_batch_type::raft_data, model::offset(0));
 
@@ -1642,12 +1641,13 @@ group::store_offsets(offset_commit_request&& r) {
     auto batch = std::move(builder).build();
     auto reader = model::make_memory_record_batch_reader(std::move(batch));
 
-    return _partition
-      ->replicate(
-        std::move(reader),
-        raft::replicate_options(raft::consistency_level::quorum_ack))
-      .then([this, req = std::move(r), commits = std::move(offset_commits)](
-              result<raft::replicate_result> r) mutable {
+    auto replicate_stages = _partition->replicate_in_stages(
+      std::move(reader),
+      raft::replicate_options(raft::consistency_level::quorum_ack));
+
+    auto f = replicate_stages.replicate_finished.then(
+      [this, req = std::move(r), commits = std::move(offset_commits)](
+        result<raft::replicate_result> r) mutable {
           error_code error = r ? error_code::none : error_code::not_coordinator;
           if (in_state(group_state::dead)) {
               return offset_commit_response(req, error);
@@ -1666,6 +1666,8 @@ group::store_offsets(offset_commit_request&& r) {
 
           return offset_commit_response(req, error);
       });
+    return offset_commit_stages(
+      std::move(replicate_stages.request_enqueued), std::move(f));
 }
 
 ss::future<cluster::commit_group_tx_reply>
@@ -1780,22 +1782,22 @@ group::handle_abort_tx(cluster::abort_group_tx_request r) {
     }
 }
 
-ss::future<offset_commit_response>
+group::offset_commit_stages
 group::handle_offset_commit(offset_commit_request&& r) {
     if (in_state(group_state::dead)) {
-        return ss::make_ready_future<offset_commit_response>(
+        return offset_commit_stages(
           offset_commit_response(r, error_code::coordinator_not_available));
 
     } else if (r.data.generation_id < 0 && in_state(group_state::empty)) {
         // <kafka>The group is only using Kafka to store offsets.</kafka>
         return store_offsets(std::move(r));
 
     } else if (!contains_member(r.data.member_id)) {
-        return ss::make_ready_future<offset_commit_response>(
+        return offset_commit_stages(
           offset_commit_response(r, error_code::unknown_member_id));
 
     } else if (r.data.generation_id != generation()) {
-        return ss::make_ready_future<offset_commit_response>(
+        return offset_commit_stages(
           offset_commit_response(r, error_code::illegal_generation));
     } else if (
       in_state(group_state::stable)
@@ -1807,12 +1809,13 @@ group::handle_offset_commit(offset_commit_request&& r) {
         schedule_next_heartbeat_expiration(member);
         return store_offsets(std::move(r));
     } else if (in_state(group_state::completing_rebalance)) {
-        return ss::make_ready_future<offset_commit_response>(
+        return offset_commit_stages(
           offset_commit_response(r, error_code::rebalance_in_progress));
     } else {
-        return ss::make_exception_future<offset_commit_response>(
-          std::runtime_error(
-            fmt::format("Unexpected group state {} for {}", _state, *this)));
+        return offset_commit_stages(
+          ss::now(),
+          ss::make_exception_future<offset_commit_response>(std::runtime_error(
+            fmt::format("Unexpected group state {} for {}", _state, *this))));
     }
 }
 

diff --git a/src/v/kafka/server/group.h b/src/v/kafka/server/group.h
@@ -14,6 +14,7 @@
 #include "cluster/partition.h"
 #include "cluster/tx_utils.h"
 #include "kafka/protocol/fwd.h"
+#include "kafka/protocol/offset_commit.h"
 #include "kafka/server/logger.h"
 #include "kafka/server/member.h"
 #include "kafka/types.h"
@@ -114,6 +115,21 @@ class group {
     static constexpr int8_t commit_tx_record_version{0};
     static constexpr int8_t aborted_tx_record_version{0};
 
+    struct offset_commit_stages {
+        explicit offset_commit_stages(offset_commit_response resp)
+          : dispatched(ss::now())
+          , committed(
+              ss::make_ready_future<offset_commit_response>(std::move(resp))) {}
+
+        offset_commit_stages(
+          ss::future<> dispatched, ss::future<offset_commit_response> resp)
+          : dispatched(std::move(dispatched))
+          , committed(std::move(resp)) {}
+
+        ss::future<> dispatched;
+        ss::future<offset_commit_response> committed;
+    };
+
     struct offset_metadata {
         model::offset log_offset;
         model::offset offset;
@@ -430,7 +446,7 @@ class group {
     ss::future<txn_offset_commit_response>
     store_txn_offsets(txn_offset_commit_request r);
 
-    ss::future<offset_commit_response> store_offsets(offset_commit_request&& r);
+    offset_commit_stages store_offsets(offset_commit_request&& r);
 
     ss::future<txn_offset_commit_response>
     handle_txn_offset_commit(txn_offset_commit_request r);
@@ -444,8 +460,7 @@ class group {
     ss::future<cluster::abort_group_tx_reply>
     handle_abort_tx(cluster::abort_group_tx_request r);
 
-    ss::future<offset_commit_response>
-    handle_offset_commit(offset_commit_request&& r);
+    offset_commit_stages handle_offset_commit(offset_commit_request&& r);
 
     ss::future<cluster::commit_group_tx_reply>
     handle_commit_tx(cluster::commit_group_tx_request r);

diff --git a/src/v/kafka/server/group_manager.cc b/src/v/kafka/server/group_manager.cc
@@ -856,13 +856,12 @@ group_manager::abort_tx(cluster::abort_group_tx_request&& r) {
       });
 }
 
-ss::future<offset_commit_response>
+group::offset_commit_stages
 group_manager::offset_commit(offset_commit_request&& r) {
     auto error = validate_group_status(
       r.ntp, r.data.group_id, offset_commit_api::key);
     if (error != error_code::none) {
-        return ss::make_ready_future<offset_commit_response>(
-          offset_commit_response(r, error));
+        return group::offset_commit_stages(offset_commit_response(r, error));
     }
 
     auto group = get_group(r.data.group_id);
@@ -878,7 +877,7 @@ group_manager::offset_commit(offset_commit_request&& r) {
         } else {
             // <kafka>or this is a request coming from an older generation.
             // either way, reject the commit</kafka>
-            return ss::make_ready_future<offset_commit_response>(
+            return group::offset_commit_stages(
               offset_commit_response(r, error_code::illegal_generation));
         }
     }

diff --git a/src/v/kafka/server/group_manager.h b/src/v/kafka/server/group_manager.h
@@ -135,8 +135,7 @@ class group_manager {
     ss::future<leave_group_response> leave_group(leave_group_request&& request);
 
     /// \brief Handle a OffsetCommit request
-    ss::future<offset_commit_response>
-    offset_commit(offset_commit_request&& request);
+    group::offset_commit_stages offset_commit(offset_commit_request&& request);
 
     ss::future<txn_offset_commit_response>
     txn_offset_commit(txn_offset_commit_request&& request);

diff --git a/src/v/kafka/server/group_router.h b/src/v/kafka/server/group_router.h
@@ -29,7 +29,9 @@
 #include <seastar/core/reactor.hh>
 #include <seastar/core/scheduling.hh>
 #include <seastar/core/sharded.hh>
+#include <seastar/core/smp.hh>
 
+#include <exception>
 #include <type_traits>
 
 namespace kafka {
@@ -129,8 +131,59 @@ class group_router final {
         return route(std::move(request), &group_manager::leave_group);
     }
 
-    auto offset_commit(offset_commit_request&& request) {
-        return route(std::move(request), &group_manager::offset_commit);
+    group::offset_commit_stages offset_commit(offset_commit_request&& request) {
+        auto m = shard_for(request.data.group_id);
+        if (!m) {
+            return group::offset_commit_stages(
+              offset_commit_response(request, error_code::not_coordinator));
+        }
+        request.ntp = std::move(m->first);
+        auto dispatched = std::make_unique<ss::promise<>>();
+        auto dispatched_f = dispatched->get_future();
+        auto f = with_scheduling_group(
+          _sg,
+          [this,
+           shard = m->second,
+           request = std::move(request),
+           dispatched = std::move(dispatched)]() mutable {
+              return _group_manager.invoke_on(
+                shard,
+                _ssg,
+                [request = std::move(request),
+                 dispatched = std::move(dispatched),
+                 source_shard = ss::this_shard_id()](
+                  group_manager& mgr) mutable {
+                    auto stages = mgr.offset_commit(std::move(request));
+                    /**
+                     * dispatched future is always ready before committed one,
+                     * we do not have to use gate in here
+                     */
+                    return stages.dispatched
+                      .then_wrapped([source_shard, d = std::move(dispatched)](
+                                      ss::future<> f) mutable {
+                          if (f.failed()) {
+                              (void)ss::smp::submit_to(
+                                source_shard,
+                                [d = std::move(d),
+                                 e = f.get_exception()]() mutable {
+                                    d->set_exception(e);
+                                    d.reset();
+                                });
+                              return;
+                          }
+                          (void)ss::smp::submit_to(
+                            source_shard, [d = std::move(d)]() mutable {
+                                d->set_value();
+                                d.reset();
+                            });
+                      })
+                      .then([f = std::move(stages.committed)]() mutable {
+                          return std::move(f);
+                      });
+                });
+          });
+        return group::offset_commit_stages(
+          std::move(dispatched_f), std::move(f));
     }
 
     auto txn_offset_commit(txn_offset_commit_request&& request) {

diff --git a/src/v/kafka/server/handlers/handler.h b/src/v/kafka/server/handlers/handler.h
@@ -41,6 +41,15 @@ concept KafkaApiHandler = requires (T h, request_context&& ctx, ss::smp_service_
     { T::handle(std::move(ctx), g) } -> std::same_as<ss::future<response_ptr>>;
 };
 )
+CONCEPT(
+template<typename T>
+concept KafkaApiTwoPhaseHandler = requires (T h, request_context&& ctx, ss::smp_service_group g) {
+    KafkaApi<typename T::api>;
+    { T::min_supported } -> std::convertible_to<const api_version&>;
+    { T::max_supported } -> std::convertible_to<const api_version&>;
+    { T::handle(std::move(ctx), g) } -> std::same_as<process_result_stages>;
+};
+)
 // clang-format on
 
 } // namespace kafka