Skip to content

Commit

Permalink
Merge pull request ceph#30689 from smithfarm/wip-42120-nautilus
Browse files Browse the repository at this point in the history
nautilus: core: osd/OSDMap: health alert for non-power-of-two pg_num

Reviewed-by: Neha Ojha <nojha@redhat.com>
  • Loading branch information
yuriw committed Feb 8, 2020
2 parents 5b95c39 + fe2035f commit e28dea6
Show file tree
Hide file tree
Showing 8 changed files with 67 additions and 13 deletions.
12 changes: 11 additions & 1 deletion PendingReleaseNotes
@@ -1,4 +1,4 @@
14.2.7
14.2.8
------

* The following OSD memory config options related to bluestore cache autotuning can now
Expand Down Expand Up @@ -33,3 +33,13 @@
named 'Records'. Note that this does not affect pulling bucket notification
from a subscription in a 'pubsub' zone, as these are already wrapped inside
that array.

* Ceph will now issue a health warning if a RADOS pool as a ``pg_num``
value that is not a power of two. This can be fixed by adjusting
the pool to a nearby power of two::

ceph osd pool set <pool-name> pg_num <new-pg-num>

Alternatively, the warning can be silenced with::

ceph config set global mon_warn_on_pool_pg_num_not_power_of_two false
17 changes: 17 additions & 0 deletions doc/rados/operations/health-checks.rst
Expand Up @@ -636,6 +636,23 @@ The PG count for existing pools can be increased or new pools can be created.
Please refer to :ref:`choosing-number-of-placement-groups` for more
information.

POOL_PG_NUM_NOT_POWER_OF_TWO
____________________________

One or more pools has a ``pg_num`` value that is not a power of two.
Although this is not strictly incorrect, it does lead to a less
balanced distribution of data because some PGs have roughly twice as
much data as others.

This is easily corrected by setting the ``pg_num`` value for the
affected pool(s) to a nearby power of two::

ceph osd pool set <pool-name> pg_num <value>

This health warning can be disabled with::

ceph config set global mon_warn_on_pool_pg_num_not_power_of_two false

POOL_TOO_FEW_PGS
________________

Expand Down
1 change: 1 addition & 0 deletions qa/tasks/ceph.conf.template
Expand Up @@ -26,6 +26,7 @@
mon warn on no sortbitwise = false
mon warn on osd down out interval zero = false
mon warn on too few osds = false
mon_warn_on_pool_pg_num_not_power_of_two = false

osd pool default erasure code profile = "plugin=jerasure technique=reed_sol_van k=2 m=1 ruleset-failure-domain=osd crush-failure-domain=osd"

Expand Down
5 changes: 5 additions & 0 deletions src/common/options.cc
Expand Up @@ -1735,6 +1735,11 @@ std::vector<Option> get_global_options() {
.add_service("mgr")
.set_description("issue POOL_APP_NOT_ENABLED health warning if pool has not application enabled"),

Option("mon_warn_on_pool_pg_num_not_power_of_two", Option::TYPE_BOOL, Option::LEVEL_DEV)
.set_default(true)
.add_service("mon")
.set_description("issue POOL_PG_NUM_NOT_POWER_OF_TWO warning if pool has a non-power-of-two pg_num value"),

Option("mon_warn_on_misplaced", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
.set_default(false)
.add_service("mgr")
Expand Down
2 changes: 1 addition & 1 deletion src/mon/OSDMonitor.cc
Expand Up @@ -1870,7 +1870,7 @@ void OSDMonitor::encode_pending(MonitorDBStore::TransactionRef t)

// health
health_check_map_t next;
tmp.check_health(&next);
tmp.check_health(cct, &next);
encode_health(next, t);
}

Expand Down
39 changes: 30 additions & 9 deletions src/osd/OSDMap.cc
Expand Up @@ -26,7 +26,6 @@
#include "common/errno.h"
#include "common/Formatter.h"
#include "common/TextTable.h"
#include "global/global_context.h"
#include "include/ceph_features.h"
#include "include/str_map.h"

Expand Down Expand Up @@ -5417,7 +5416,8 @@ void print_osd_utilization(const OSDMap& osdmap,
}
}

void OSDMap::check_health(health_check_map_t *checks) const
void OSDMap::check_health(CephContext *cct,
health_check_map_t *checks) const
{
int num_osds = get_num_osds();

Expand Down Expand Up @@ -5462,7 +5462,7 @@ void OSDMap::check_health(health_check_map_t *checks) const
break;
type = crush->get_bucket_type(parent_id);
if (!subtree_type_is_down(
g_ceph_context, parent_id, type,
cct, parent_id, type,
&down_in_osds, &up_in_osds, &subtree_up, &subtree_type_down))
break;
current = parent_id;
Expand Down Expand Up @@ -5591,7 +5591,7 @@ void OSDMap::check_health(health_check_map_t *checks) const
{
// An osd could configure failsafe ratio, to something different
// but for now assume it is the same here.
float fsr = g_conf()->osd_failsafe_full_ratio;
float fsr = cct->_conf->osd_failsafe_full_ratio;
if (fsr > 1.0) fsr /= 100;
float fr = get_full_ratio();
float br = get_backfillfull_ratio();
Expand Down Expand Up @@ -5742,19 +5742,19 @@ void OSDMap::check_health(health_check_map_t *checks) const
}

// OLD_CRUSH_TUNABLES
if (g_conf()->mon_warn_on_legacy_crush_tunables) {
if (cct->_conf->mon_warn_on_legacy_crush_tunables) {
string min = crush->get_min_required_version();
if (min < g_conf()->mon_crush_min_required_version) {
if (min < cct->_conf->mon_crush_min_required_version) {
ostringstream ss;
ss << "crush map has legacy tunables (require " << min
<< ", min is " << g_conf()->mon_crush_min_required_version << ")";
<< ", min is " << cct->_conf->mon_crush_min_required_version << ")";
auto& d = checks->add("OLD_CRUSH_TUNABLES", HEALTH_WARN, ss.str());
d.detail.push_back("see http://docs.ceph.com/docs/master/rados/operations/crush-map/#tunables");
}
}

// OLD_CRUSH_STRAW_CALC_VERSION
if (g_conf()->mon_warn_on_crush_straw_calc_version_zero) {
if (cct->_conf->mon_warn_on_crush_straw_calc_version_zero) {
if (crush->get_straw_calc_version() == 0) {
ostringstream ss;
ss << "crush map has straw_calc_version=0";
Expand All @@ -5765,7 +5765,7 @@ void OSDMap::check_health(health_check_map_t *checks) const
}

// CACHE_POOL_NO_HIT_SET
if (g_conf()->mon_warn_on_cache_pools_without_hit_sets) {
if (cct->_conf->mon_warn_on_cache_pools_without_hit_sets) {
list<string> detail;
for (map<int64_t, pg_pool_t>::const_iterator p = pools.begin();
p != pools.end();
Expand Down Expand Up @@ -5843,6 +5843,27 @@ void OSDMap::check_health(health_check_map_t *checks) const
d.detail.swap(nearfull_detail);
}
}

// POOL_PG_NUM_NOT_POWER_OF_TWO
if (cct->_conf.get_val<bool>("mon_warn_on_pool_pg_num_not_power_of_two")) {
list<string> detail;
for (auto it : get_pools()) {
if (!isp2(it.second.get_pg_num_target())) {
ostringstream ss;
ss << "pool '" << get_pool_name(it.first)
<< "' pg_num " << it.second.get_pg_num_target()
<< " is not a power of two";
detail.push_back(ss.str());
}
}
if (!detail.empty()) {
ostringstream ss;
ss << detail.size() << " pool(s) have non-power-of-two pg_num";
auto& d = checks->add("POOL_PG_NUM_NOT_POWER_OF_TWO", HEALTH_WARN,
ss.str());
d.detail.swap(detail);
}
}
}

int OSDMap::parse_osd_id_list(const vector<string>& ls, set<int> *out,
Expand Down
2 changes: 1 addition & 1 deletion src/osd/OSDMap.h
Expand Up @@ -1499,7 +1499,7 @@ class OSDMap {
static void generate_test_instances(list<OSDMap*>& o);
bool check_new_blacklist_entries() const { return new_blacklist_entries; }

void check_health(health_check_map_t *checks) const;
void check_health(CephContext *cct, health_check_map_t *checks) const;

int parse_osd_id_list(const vector<string>& ls,
set<int> *out,
Expand Down
2 changes: 1 addition & 1 deletion src/tools/osdmaptool.cc
Expand Up @@ -739,7 +739,7 @@ int main(int argc, const char **argv)

if (health) {
health_check_map_t checks;
osdmap.check_health(&checks);
osdmap.check_health(cct.get(), &checks);
JSONFormatter jf(true);
jf.dump_object("checks", checks);
jf.flush(cout);
Expand Down

0 comments on commit e28dea6

Please sign in to comment.