diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 2244437e45289..4da1fc8f0b38f 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -9724,6 +9724,12 @@ void MDCache::request_cleanup(MDRequestRef& mdr) // remove from map active_requests.erase(mdr->reqid); + // queue next replay op? + if (mdr->is_queued_for_replay() && !mdr->get_queued_next_replay_op()) { + mdr->set_queued_next_replay_op(); + mds->queue_one_replay(); + } + if (mds->logger) log_stat(); diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 43e2d6523093f..c5eaf9873ec8d 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -2055,6 +2055,7 @@ bool MDSRank::queue_one_replay() if (!replay_queue.empty()) { queue_waiter(replay_queue.front()); replay_queue.pop_front(); + dout(10) << " queued next replay op" << dendl; return true; } if (!replaying_requests_done) { @@ -2062,6 +2063,7 @@ bool MDSRank::queue_one_replay() mdlog->flush(); } maybe_clientreplay_done(); + dout(10) << " journaled last replay op" << dendl; return false; } diff --git a/src/mds/Mutation.h b/src/mds/Mutation.h index 6d4073aafb3e1..16b0700e151ba 100644 --- a/src/mds/Mutation.h +++ b/src/mds/Mutation.h @@ -387,6 +387,12 @@ struct MDRequestImpl : public MutationImpl { void set_filepath(const filepath& fp); void set_filepath2(const filepath& fp); bool is_queued_for_replay() const; + bool get_queued_next_replay_op() const { + return queued_next_replay_op; + } + void set_queued_next_replay_op() { + queued_next_replay_op = true; + } int compare_paths(); bool can_batch(); @@ -456,6 +462,7 @@ struct MDRequestImpl : public MutationImpl { void _dump_op_descriptor_unlocked(std::ostream& stream) const override; private: mutable ceph::spinlock msg_lock; + bool queued_next_replay_op = false; }; struct MDPeerUpdate { diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 554106e29c042..e6fe0f8c9a1c1 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -299,6 +299,7 @@ void Server::dispatch(const cref_t &m) return; } bool queue_replay = false; + dout(5) << "dispatch request in up:reconnect: " << *req << dendl; if (req->is_replay() || req->is_async()) { dout(3) << "queuing replayed op" << dendl; queue_replay = true; @@ -317,10 +318,13 @@ void Server::dispatch(const cref_t &m) // process completed request in clientreplay stage. The completed request // might have created new file/directorie. This guarantees MDS sends a reply // to client before other request modifies the new file/directorie. - if (session->have_completed_request(req->get_reqid().tid, NULL)) { - dout(3) << "queuing completed op" << dendl; + bool r = session->have_completed_request(req->get_reqid().tid, NULL); + if (r) { + dout(3) << __func__ << ": queuing completed op" << dendl; queue_replay = true; - } + } else { + dout(20) << __func__ << ": request not complete" << dendl; + } // this request was created before the cap reconnect message, drop any embedded // cap releases. req->releases.clear(); @@ -1975,13 +1979,16 @@ void Server::journal_and_reply(MDRequestRef& mdr, CInode *in, CDentry *dn, LogEv mdr->committing = true; submit_mdlog_entry(le, fin, mdr, __func__); - - if (mdr->client_request && mdr->client_request->is_queued_for_replay()) { - if (mds->queue_one_replay()) { - dout(10) << " queued next replay op" << dendl; - } else { - dout(10) << " journaled last replay op" << dendl; - } + + if (mdr->is_queued_for_replay()) { + + /* We want to queue the next replay op while waiting for the journaling, so + * do it now when the early (unsafe) replay is dispatched. Don't wait until + * this request is cleaned up in MDCache.cc. + */ + + mdr->set_queued_next_replay_op(); + mds->queue_one_replay(); } else if (mdr->did_early_reply) mds->locker->drop_rdlocks_for_early_reply(mdr.get()); else @@ -2282,15 +2289,12 @@ void Server::reply_client_request(MDRequestRef& mdr, const ref_t & mds->send_message_client(reply, session); } - if (req->is_queued_for_replay() && - (mdr->has_completed || reply->get_result() < 0)) { - if (reply->get_result() < 0) { - int r = reply->get_result(); + if (req->is_queued_for_replay()) { + if (int r = reply->get_result(); r < 0) { derr << "reply_client_request: failed to replay " << *req - << " error " << r << " (" << cpp_strerror(r) << ")" << dendl; + << " error " << r << " (" << cpp_strerror(r) << ")" << dendl; mds->clog->warn() << "failed to replay " << req->get_reqid() << " error " << r; } - mds->queue_one_replay(); } // clean up request @@ -2488,8 +2492,12 @@ void Server::handle_client_request(const cref_t &req) // register + dispatch MDRequestRef mdr = mdcache->request_start(req); - if (!mdr.get()) + if (!mdr.get()) { + dout(5) << __func__ << ": possibly duplicate op " << *req << dendl; + if (req->is_queued_for_replay()) + mds->queue_one_replay(); return; + } if (session) { mdr->session = session;