diff --git a/src/client/Client.cc b/src/client/Client.cc index 3ebc3413903d5..2c4ec30342f87 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -2477,7 +2477,7 @@ void Client::send_request(MetaRequest *request, MetaSession *session, mds_rank_t mds = session->mds_num; ldout(cct, 10) << __func__ << " rebuilding request " << request->get_tid() << " for mds." << mds << dendl; - auto r = build_client_request(request); + auto r = build_client_request(request, mds); if (!r) return; @@ -2522,8 +2522,11 @@ void Client::send_request(MetaRequest *request, MetaSession *session, session->con->send_message2(std::move(r)); } -ref_t Client::build_client_request(MetaRequest *request) +ref_t Client::build_client_request(MetaRequest *request, mds_rank_t mds) { + auto session = mds_sessions.at(mds); + bool old_version = !session->mds_features.test(CEPHFS_FEATURE_32BITS_RETRY_FWD); + /* * The type of 'retry_attempt' in 'MetaRequest' is 'int', * while in 'ceph_mds_request_head' the type of 'num_retry' @@ -2546,7 +2549,7 @@ ref_t Client::build_client_request(MetaRequest *request) return nullptr; } - auto req = make_message(request->get_op()); + auto req = make_message(request->get_op(), old_version); req->set_tid(request->tid); req->set_stamp(request->op_stamp); memcpy(&req->head, &request->head, sizeof(ceph_mds_request_head)); diff --git a/src/client/Client.h b/src/client/Client.h index 6c1132f75883e..73a603397afde 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -956,7 +956,7 @@ class Client : public Dispatcher, public md_config_obs_t { void connect_mds_targets(mds_rank_t mds); void send_request(MetaRequest *request, MetaSession *session, bool drop_cap_releases=false); - MRef build_client_request(MetaRequest *request); + MRef build_client_request(MetaRequest *request, mds_rank_t mds); void kick_requests(MetaSession *session); void kick_requests_closed(MetaSession *session); void handle_client_request_forward(const MConstRef& reply); diff --git a/src/client/MetaRequest.cc b/src/client/MetaRequest.cc index 2450a1f681c9c..3994424e79360 100644 --- a/src/client/MetaRequest.cc +++ b/src/client/MetaRequest.cc @@ -46,8 +46,8 @@ void MetaRequest::dump(Formatter *f) const f->dump_unsigned("oldest_client_tid", head.oldest_client_tid); f->dump_unsigned("mdsmap_epoch", head.mdsmap_epoch); f->dump_unsigned("flags", head.flags); - f->dump_unsigned("num_retry", head.num_retry); - f->dump_unsigned("num_fwd", head.num_fwd); + f->dump_unsigned("num_retry", head.ext_num_retry); + f->dump_unsigned("num_fwd", head.ext_num_fwd); f->dump_unsigned("num_releases", head.num_releases); f->dump_int("abort_rc", abort_rc); diff --git a/src/client/MetaRequest.h b/src/client/MetaRequest.h index 86722902a6d68..ea39306608254 100644 --- a/src/client/MetaRequest.h +++ b/src/client/MetaRequest.h @@ -156,8 +156,8 @@ struct MetaRequest { // normal fields void set_tid(ceph_tid_t t) { tid = t; } void set_oldest_client_tid(ceph_tid_t t) { head.oldest_client_tid = t; } - void inc_num_fwd() { head.num_fwd = head.num_fwd + 1; } - void set_retry_attempt(int a) { head.num_retry = a; } + void inc_num_fwd() { head.ext_num_fwd = head.ext_num_fwd + 1; } + void set_retry_attempt(int a) { head.ext_num_retry = a; } void set_filepath(const filepath& fp) { path = fp; } void set_filepath2(const filepath& fp) { path2 = fp; } void set_alternate_name(std::string an) { alternate_name = an; } diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index f2d8a2d242e83..124eba6320c6a 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -621,7 +621,7 @@ union ceph_mds_request_args { } __attribute__ ((packed)) lookupino; } __attribute__ ((packed)); -#define CEPH_MDS_REQUEST_HEAD_VERSION 1 +#define CEPH_MDS_REQUEST_HEAD_VERSION 2 /* * Note that any change to this structure must ensure that it is compatible @@ -632,29 +632,42 @@ struct ceph_mds_request_head { __le64 oldest_client_tid; __le32 mdsmap_epoch; /* on client */ __le32 flags; /* CEPH_MDS_FLAG_* */ - __u8 num_retry, num_fwd; /* count retry, fwd attempts */ + __u8 num_retry, num_fwd; /* legacy count retry and fwd attempts */ __le16 num_releases; /* # include cap/lease release records */ __le32 op; /* mds op code */ __le32 caller_uid, caller_gid; __le64 ino; /* use this ino for openc, mkdir, mknod, etc. (if replaying) */ union ceph_mds_request_args args; + + __le32 ext_num_retry; /* new count retry attempts */ + __le32 ext_num_fwd; /* new count fwd attempts */ } __attribute__ ((packed)); -void inline encode(const struct ceph_mds_request_head& h, ceph::buffer::list& bl) { +void inline encode(const struct ceph_mds_request_head& h, ceph::buffer::list& bl, bool old_version) { using ceph::encode; encode(h.version, bl); encode(h.oldest_client_tid, bl); encode(h.mdsmap_epoch, bl); encode(h.flags, bl); - encode(h.num_retry, bl); - encode(h.num_fwd, bl); + + // For old MDS daemons + __u8 num_retry = __u32(h.ext_num_retry); + __u8 num_fwd = __u32(h.ext_num_fwd); + encode(num_retry, bl); + encode(num_fwd, bl); + encode(h.num_releases, bl); encode(h.op, bl); encode(h.caller_uid, bl); encode(h.caller_gid, bl); encode(h.ino, bl); bl.append((char*)&h.args, sizeof(h.args)); + + if (!old_version) { + encode(h.ext_num_retry, bl); + encode(h.ext_num_fwd, bl); + } } void inline decode(struct ceph_mds_request_head& h, ceph::buffer::list::const_iterator& bl) { @@ -671,6 +684,14 @@ void inline decode(struct ceph_mds_request_head& h, ceph::buffer::list::const_it decode(h.caller_gid, bl); decode(h.ino, bl); bl.copy(sizeof(h.args), (char*)&(h.args)); + + if (h.version >= 2) { + decode(h.ext_num_retry, bl); + decode(h.ext_num_fwd, bl); + } else { + h.ext_num_retry = h.num_retry; + h.ext_num_fwd = h.num_fwd; + } } /* cap/lease release record */ diff --git a/src/mds/cephfs_features.cc b/src/mds/cephfs_features.cc index 61c442699a5d2..6102a9b1652c2 100644 --- a/src/mds/cephfs_features.cc +++ b/src/mds/cephfs_features.cc @@ -29,6 +29,7 @@ static const std::array feature_names "alternate_name", "notify_session_state", "op_getvxattr", + "32bits_retry_fwd", }; static_assert(feature_names.size() == CEPHFS_FEATURE_MAX + 1); diff --git a/src/mds/cephfs_features.h b/src/mds/cephfs_features.h index c73ac1e8397a4..cd6fde1a57435 100644 --- a/src/mds/cephfs_features.h +++ b/src/mds/cephfs_features.h @@ -45,7 +45,8 @@ namespace ceph { #define CEPHFS_FEATURE_ALTERNATE_NAME 15 #define CEPHFS_FEATURE_NOTIFY_SESSION_STATE 16 #define CEPHFS_FEATURE_OP_GETVXATTR 17 -#define CEPHFS_FEATURE_MAX 17 +#define CEPHFS_FEATURE_32BITS_RETRY_FWD 18 +#define CEPHFS_FEATURE_MAX 18 #define CEPHFS_FEATURES_ALL { \ 0, 1, 2, 3, 4, \ @@ -64,6 +65,7 @@ namespace ceph { CEPHFS_FEATURE_ALTERNATE_NAME, \ CEPHFS_FEATURE_NOTIFY_SESSION_STATE, \ CEPHFS_FEATURE_OP_GETVXATTR, \ + CEPHFS_FEATURE_32BITS_RETRY_FWD, \ } #define CEPHFS_METRIC_FEATURES_ALL { \ diff --git a/src/messages/MClientRequest.h b/src/messages/MClientRequest.h index 5a3f2ce74c8ce..4835be2edff7d 100644 --- a/src/messages/MClientRequest.h +++ b/src/messages/MClientRequest.h @@ -73,6 +73,7 @@ class MClientRequest final : public MMDSOp { public: mutable struct ceph_mds_request_head head; /* XXX HACK! */ utime_t stamp; + bool peer_old_version = false; struct Release { mutable ceph_mds_request_release item; @@ -111,10 +112,11 @@ class MClientRequest final : public MMDSOp { // cons MClientRequest() : MMDSOp(CEPH_MSG_CLIENT_REQUEST, HEAD_VERSION, COMPAT_VERSION) {} - MClientRequest(int op) + MClientRequest(int op, bool over=true) : MMDSOp(CEPH_MSG_CLIENT_REQUEST, HEAD_VERSION, COMPAT_VERSION) { memset(&head, 0, sizeof(head)); head.op = op; + peer_old_version = over; } ~MClientRequest() final {} @@ -160,8 +162,8 @@ class MClientRequest final : public MMDSOp { // normal fields void set_stamp(utime_t t) { stamp = t; } void set_oldest_client_tid(ceph_tid_t t) { head.oldest_client_tid = t; } - void inc_num_fwd() { head.num_fwd = head.num_fwd + 1; } - void set_retry_attempt(int a) { head.num_retry = a; } + void inc_num_fwd() { head.ext_num_fwd = head.ext_num_fwd + 1; } + void set_retry_attempt(int a) { head.ext_num_retry = a; } void set_filepath(const filepath& fp) { path = fp; } void set_filepath2(const filepath& fp) { path2 = fp; } void set_string2(const char *s) { path2.set_path(std::string_view(s), 0); } @@ -192,8 +194,8 @@ class MClientRequest final : public MMDSOp { utime_t get_stamp() const { return stamp; } ceph_tid_t get_oldest_client_tid() const { return head.oldest_client_tid; } - int get_num_fwd() const { return head.num_fwd; } - int get_retry_attempt() const { return head.num_retry; } + int get_num_fwd() const { return head.ext_num_fwd; } + int get_retry_attempt() const { return head.ext_num_retry; } int get_op() const { return head.op; } unsigned get_caller_uid() const { return head.caller_uid; } unsigned get_caller_gid() const { return head.caller_gid; } @@ -252,10 +254,20 @@ class MClientRequest final : public MMDSOp { void encode_payload(uint64_t features) override { using ceph::encode; head.num_releases = releases.size(); - head.version = CEPH_MDS_REQUEST_HEAD_VERSION; + /* + * If the peer is old version, we must skip all the + * new members, because the old version of MDS or + * client will just copy the 'head' memory and isn't + * that smart to skip them. + */ + if (peer_old_version) { + head.version = 1; + } else { + head.version = CEPH_MDS_REQUEST_HEAD_VERSION; + } if (features & CEPH_FEATURE_FS_BTIME) { - encode(head, payload); + encode(head, payload, peer_old_version); } else { struct ceph_mds_request_head_legacy old_mds_head; @@ -312,8 +324,10 @@ class MClientRequest final : public MMDSOp { out << " " << get_filepath2(); if (stamp != utime_t()) out << " " << stamp; - if (head.num_retry) - out << " RETRY=" << (int)head.num_retry; + if (head.ext_num_fwd) + out << " FWD=" << (int)head.ext_num_fwd; + if (head.ext_num_retry) + out << " RETRY=" << (int)head.ext_num_retry; if (is_async()) out << " ASYNC"; if (is_replay())