Skip to content

Commit

Permalink
mgr/stats: be resilient to offline MDS rank0
Browse files Browse the repository at this point in the history
Fixes: https://tracker.ceph.com/issues/50033
Signed-off-by: Jos Collin <jcollin@redhat.com>
  • Loading branch information
joscollin committed Jul 2, 2021
1 parent 5b766a8 commit 103e6e6
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 0 deletions.
8 changes: 8 additions & 0 deletions src/pybind/mgr/stats/fs/perf_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,14 @@ def notify(self, cmdtag):
self.log.debug("client_metadata={0}, to_purge={1}".format(
self.client_metadata['metadata'], self.client_metadata['to_purge']))

def re_register_queries(self):
#re-register user queries
for filter_spec in list(self.user_queries.keys()):
user_query = self.user_queries[filter_spec]
user_query[QUERY_IDS] = self.register_mds_perf_query(filter_spec)
user_query[GLOBAL_QUERY_ID] = self.register_global_perf_query(filter_spec)
user_query[QUERY_LAST_REQUEST] = datetime.now()

def update_client_meta(self, rank_set):
new_updates = {}
pending_updates = [v[0] for v in self.client_metadata['in_progress'].values()]
Expand Down
22 changes: 22 additions & 0 deletions src/pybind/mgr/stats/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import json
import threading
from typing import List, Dict

from mgr_module import MgrModule, Option
Expand All @@ -25,14 +26,35 @@ class Module(MgrModule):
def __init__(self, *args, **kwargs):
super(Module, self).__init__(*args, **kwargs)
self.fs_perf_stats = FSPerfStats(self)
try:
self.prev_mds_name_rank0 = self.get_rank0_mds_name(self.get('fs_map'))
except Exception:
raise

def notify(self, notify_type, notify_id):
if notify_type == "command":
self.fs_perf_stats.notify(notify_id)
elif notify_type == "fs_map":
try:
mds_name_rank0 = self.get_rank0_mds_name(self.get('fs_map'))
if (mds_name_rank0 != self.prev_mds_name_rank0):
threading.Timer(0, self.fs_perf_stats.re_register_queries).start()
self.prev_mds_name_rank0 = mds_name_rank0
except RuntimeError as e:
log.warn(e)

def handle_command(self, inbuf, cmd):
prefix = cmd['prefix']
# only supported command is `fs perf stats` right now
if prefix.startswith('fs perf stats'):
return self.fs_perf_stats.get_perf_data(cmd)
raise NotImplementedError(cmd['prefix'])

def get_rank0_mds_name(self, fsmap):
for fs in fsmap['filesystems']:
mds_map = fs['mdsmap']
if mds_map is not None:
for mds_id, mds_status in mds_map['info'].items():
if mds_status['rank'] == 0:
return mds_status['name']
raise RuntimeError("Failed to find a rank0 mds in fsmap")

0 comments on commit 103e6e6

Please sign in to comment.