diff --git a/jstests/noPassthrough/sharded_index_consistency_metrics.js b/jstests/noPassthrough/sharded_index_consistency_metrics.js new file mode 100644 index 0000000000000..5dc53e0a3782d --- /dev/null +++ b/jstests/noPassthrough/sharded_index_consistency_metrics.js @@ -0,0 +1,139 @@ +/* + * Tests index consistency metrics in the serverStatus output. + * @tags: [requires_fcv_44, requires_sharding] + */ +(function() { +"use strict"; + +// This test creates inconsistent indexes. +TestData.skipCheckingIndexesConsistentAcrossCluster = true; + +/* + * Asserts that the serverStatus output does not contain the index consistency metrics + * both by default and when 'shardedIndexConsistency' is explicitly included. + */ +function assertServerStatusNotContainIndexMetrics(conn) { + let res = assert.commandWorked(conn.adminCommand({serverStatus: 1})); + assert.eq(undefined, res.shardedIndexConsistency, tojson(res.shardedIndexConsistency)); + + res = assert.commandWorked(conn.adminCommand({serverStatus: 1, shardedIndexConsistency: 1})); + assert.eq(undefined, res.shardedIndexConsistency, tojson(res.shardedIndexConsistency)); +} + +/* + * Asserts that eventually the number of sharded collections with inconsistent indexes in the + * serverStatus output is equal to the expected count. + */ +function checkServerStatusNumCollsWithInconsistentIndexes(conn, expectedCount) { + assert.soon( + () => { + const res = assert.commandWorked(conn.adminCommand({serverStatus: 1})); + assert.hasFields(res, ["shardedIndexConsistency"]); + assert.hasFields(res.shardedIndexConsistency, + ["numShardedCollectionsWithInconsistentIndexes"]); + return expectedCount == + res.shardedIndexConsistency.numShardedCollectionsWithInconsistentIndexes; + }, + `expect the count of sharded collections with inconsistent indexes to eventually be equal to ${ + expectedCount}`, + undefined /* timeout */, + 1000 /* interval */); +} + +/* + * For each mongod in 'connsWithIndexConsistencyMetrics', asserts that its serverStatus + * output has the expected number of collections with inconsistent indexes. For each mongod + * in 'connsWithoutIndexConsistencyMetrics', asserts that its serverStatus output does + * not contain the index consistency metrics. + */ +function checkServerStatus(connsWithIndexConsistencyMetrics, + connsWithoutIndexConsistencyMetrics, + expectedNumCollsWithInconsistentIndexes) { + for (const conn of connsWithIndexConsistencyMetrics) { + checkServerStatusNumCollsWithInconsistentIndexes(conn, + expectedNumCollsWithInconsistentIndexes); + } + for (const conn of connsWithoutIndexConsistencyMetrics) { + assertServerStatusNotContainIndexMetrics(conn); + } +} + +const intervalMS = 3000; +const st = new ShardingTest({ + shards: 2, + config: 2, + configOptions: {setParameter: {"shardedIndexConsistencyCheckIntervalMS": intervalMS}} +}); +const dbName = "testDb"; +const ns1 = dbName + ".testColl1"; +const ns2 = dbName + ".testColl2"; +const ns3 = dbName + ".testColl3"; +const expiration = 1000000; +const filterExpr = { + x: {$gt: 50} +}; + +assert.commandWorked(st.s.adminCommand({enableSharding: dbName})); +st.ensurePrimaryShard(dbName, st.shard0.shardName); +assert.commandWorked(st.s.adminCommand({shardCollection: ns1, key: {_id: "hashed"}})); +assert.commandWorked(st.s.adminCommand({shardCollection: ns2, key: {_id: "hashed"}})); +assert.commandWorked(st.s.adminCommand({shardCollection: ns3, key: {_id: "hashed"}})); + +st.config1.getDB("admin").runCommand({setParameter: 1, enableShardedIndexConsistencyCheck: false}); +const connsWithIndexConsistencyMetrics = [st.config0]; +const connsWithoutIndexConsistencyMetrics = [st.config1, st.shard0, st.shard1, st.s]; + +checkServerStatus(connsWithIndexConsistencyMetrics, connsWithoutIndexConsistencyMetrics, 0); + +// Create an inconsistent index for ns1. +assert.commandWorked(st.shard0.getCollection(ns1).createIndex({x: 1})); +checkServerStatus(connsWithIndexConsistencyMetrics, connsWithoutIndexConsistencyMetrics, 1); + +// Create another inconsistent index for ns1. +assert.commandWorked(st.shard1.getCollection(ns1).createIndexes([{y: 1}])); +checkServerStatus(connsWithIndexConsistencyMetrics, connsWithoutIndexConsistencyMetrics, 1); + +// Create an inconsistent index for ns2. +assert.commandWorked(st.shard0.getCollection(ns2).createIndex({x: 1})); +checkServerStatus(connsWithIndexConsistencyMetrics, connsWithoutIndexConsistencyMetrics, 2); + +// Resolve the index inconsistency for ns2. +assert.commandWorked(st.shard1.getCollection(ns2).createIndex({x: 1})); +checkServerStatus(connsWithIndexConsistencyMetrics, connsWithoutIndexConsistencyMetrics, 1); + +// Create indexes for n3 with the same options but in different orders on each shard, and verify +// that it is not considered as inconsistent. +assert.commandWorked(st.shard0.getCollection(ns3).createIndex({x: 1}, { + name: "indexWithOptionsOrderedDifferently", + partialFilterExpression: filterExpr, + expireAfterSeconds: expiration +})); +assert.commandWorked(st.shard1.getCollection(ns3).createIndex({x: 1}, { + name: "indexWithOptionsOrderedDifferently", + expireAfterSeconds: expiration, + partialFilterExpression: filterExpr +})); +checkServerStatus(connsWithIndexConsistencyMetrics, connsWithoutIndexConsistencyMetrics, 1); + +// Create indexes for n3 with the same key but different options on each shard, and verify that +// it is considered as inconsistent. +assert.commandWorked(st.shard0.getCollection(ns3).createIndex( + {y: 1}, {name: "indexWithDifferentOptions", expireAfterSeconds: expiration})); +assert.commandWorked( + st.shard1.getCollection(ns3).createIndex({y: 1}, {name: "indexWithDifferentOptions"})); +checkServerStatus(connsWithIndexConsistencyMetrics, connsWithoutIndexConsistencyMetrics, 2); + +st.stop(); + +// Verify that the serverStatus output for standalones and non-sharded repilca set servers does +// not contain the index consistency metrics. +const standaloneMongod = MongoRunner.runMongod(); +assertServerStatusNotContainIndexMetrics(standaloneMongod); +MongoRunner.stopMongod(standaloneMongod); + +const rst = ReplSetTest({nodes: 1}); +rst.startSet(); +rst.initiate(); +assertServerStatusNotContainIndexMetrics(rst.getPrimary()); +rst.stopSet(); +}()); diff --git a/src/mongo/db/commands/SConscript b/src/mongo/db/commands/SConscript index 7e90c43ec118f..568e23c6272b8 100644 --- a/src/mongo/db/commands/SConscript +++ b/src/mongo/db/commands/SConscript @@ -202,7 +202,7 @@ env.Library( "fsync.cpp", ], LIBDEPS_PRIVATE=[ - '$BUILD_DIR/mongo/db/auth/authprivilege', + '$BUILD_DIR/mongo/db/auth/authprivilege', '$BUILD_DIR/mongo/db/commands', '$BUILD_DIR/mongo/db/concurrency/write_conflict_exception', '$BUILD_DIR/mongo/db/curop', @@ -374,6 +374,7 @@ env.Library( 'rwc_defaults_commands.cpp', "set_feature_compatibility_version_command.cpp", "set_index_commit_quorum_command.cpp", + "sharded_index_consistency_server_status.cpp", "shutdown_d.cpp", "snapshot_management.cpp", "top_command.cpp", diff --git a/src/mongo/db/commands/sharded_index_consistency_server_status.cpp b/src/mongo/db/commands/sharded_index_consistency_server_status.cpp new file mode 100644 index 0000000000000..876427b152543 --- /dev/null +++ b/src/mongo/db/commands/sharded_index_consistency_server_status.cpp @@ -0,0 +1,68 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/commands/server_status.h" +#include "mongo/db/s/periodic_sharded_index_consistency_checker.h" +#include "mongo/db/s/sharding_runtime_d_params_gen.h" + +namespace mongo { +namespace { + +bool isConfigServerWithShardedIndexConsistencyCheckEnabled() { + return serverGlobalParams.clusterRole == ClusterRole::ConfigServer && + enableShardedIndexConsistencyCheck.load(); +} + +class ShardedIndexConsistencyServerStatus final : public ServerStatusSection { +public: + ShardedIndexConsistencyServerStatus() : ServerStatusSection("shardedIndexConsistency") {} + + bool includeByDefault() const override { + return isConfigServerWithShardedIndexConsistencyCheckEnabled(); + } + + BSONObj generateSection(OperationContext* opCtx, + const BSONElement& configElement) const override { + if (!isConfigServerWithShardedIndexConsistencyCheckEnabled()) { + return {}; + } + + BSONObjBuilder builder; + builder.append("numShardedCollectionsWithInconsistentIndexes", + PeriodicShardedIndexConsistencyChecker::get(opCtx->getServiceContext()) + .getNumShardedCollsWithInconsistentIndexes()); + return builder.obj(); + } + +} indexConsistencyServerStatus; + +} // namespace +} // namespace mongo diff --git a/src/mongo/db/db.cpp b/src/mongo/db/db.cpp index 14bed1ae03536..3fb1a95eabeb3 100644 --- a/src/mongo/db/db.cpp +++ b/src/mongo/db/db.cpp @@ -121,6 +121,7 @@ #include "mongo/db/s/config/sharding_catalog_manager.h" #include "mongo/db/s/config_server_op_observer.h" #include "mongo/db/s/op_observer_sharding_impl.h" +#include "mongo/db/s/periodic_sharded_index_consistency_checker.h" #include "mongo/db/s/shard_server_op_observer.h" #include "mongo/db/s/sharding_initialization_mongod.h" #include "mongo/db/s/sharding_state_recovery.h" @@ -965,6 +966,11 @@ void shutdownTask(const ShutdownTaskArgs& shutdownArgs) { lsc->joinOnShutDown(); } + // Terminate the index consistency check. + if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) { + PeriodicShardedIndexConsistencyChecker::get(serviceContext).onShutDown(); + } + // Shutdown the TransportLayer so that new connections aren't accepted if (auto tl = serviceContext->getTransportLayer()) { log(LogComponent::kNetwork) << "shutdown: going to close listening sockets..."; diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp index 38ffb39707679..aa22b6b818eee 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp @@ -81,6 +81,7 @@ #include "mongo/db/s/config/sharding_catalog_manager.h" #include "mongo/db/s/migration_util.h" #include "mongo/db/s/periodic_balancer_config_refresher.h" +#include "mongo/db/s/periodic_sharded_index_consistency_checker.h" #include "mongo/db/s/sharding_initialization_mongod.h" #include "mongo/db/s/sharding_state_recovery.h" #include "mongo/db/s/transaction_coordinator_service.h" @@ -708,6 +709,7 @@ void ReplicationCoordinatorExternalStateImpl::closeConnections() { void ReplicationCoordinatorExternalStateImpl::shardingOnStepDownHook() { if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) { Balancer::get(_service)->interruptBalancer(); + PeriodicShardedIndexConsistencyChecker::get(_service).onStepDown(); TransactionCoordinatorService::get(_service)->onStepDown(); } else if (ShardingState::get(_service)->enabled()) { ChunkSplitter::get(_service).onStepDown(); @@ -795,6 +797,7 @@ void ReplicationCoordinatorExternalStateImpl::_shardingOnTransitionToPrimaryHook validator->enableKeyGenerator(opCtx, true); } + PeriodicShardedIndexConsistencyChecker::get(_service).onStepUp(_service); TransactionCoordinatorService::get(_service)->onStepUp(opCtx); } else if (ShardingState::get(opCtx)->enabled()) { Status status = ShardingStateRecovery::recover(opCtx); diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript index 029f80d545156..22d1ca9e67753 100644 --- a/src/mongo/db/s/SConscript +++ b/src/mongo/db/s/SConscript @@ -55,6 +55,7 @@ env.Library( 'move_timing_helper.cpp', 'namespace_metadata_change_notifications.cpp', 'periodic_balancer_config_refresher.cpp', + 'periodic_sharded_index_consistency_checker.cpp', 'range_deletion_util.cpp', 'read_only_catalog_cache_loader.cpp', 'scoped_operation_completion_sharding_actions.cpp', @@ -91,6 +92,7 @@ env.Library( '$BUILD_DIR/mongo/db/storage/remove_saver', '$BUILD_DIR/mongo/db/transaction', '$BUILD_DIR/mongo/s/client/shard_local', + '$BUILD_DIR/mongo/s/query/cluster_aggregate', '$BUILD_DIR/mongo/s/sharding_initialization', 'chunk_splitter', 'sharding_api_d', diff --git a/src/mongo/db/s/periodic_sharded_index_consistency_checker.cpp b/src/mongo/db/s/periodic_sharded_index_consistency_checker.cpp new file mode 100644 index 0000000000000..387d2c72652e7 --- /dev/null +++ b/src/mongo/db/s/periodic_sharded_index_consistency_checker.cpp @@ -0,0 +1,200 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding + +#include "mongo/platform/basic.h" + +#include "mongo/db/s/periodic_sharded_index_consistency_checker.h" + +#include "mongo/db/auth/privilege.h" +#include "mongo/db/operation_context.h" +#include "mongo/db/s/sharding_runtime_d_params_gen.h" +#include "mongo/db/service_context.h" +#include "mongo/s/grid.h" +#include "mongo/s/query/cluster_aggregate.h" +#include "mongo/util/log.h" + +namespace mongo { + +namespace { + +const auto getPeriodicShardedIndexConsistencyChecker = + ServiceContext::declareDecoration(); + +} // namespace + +PeriodicShardedIndexConsistencyChecker& PeriodicShardedIndexConsistencyChecker::get( + OperationContext* opCtx) { + return get(opCtx->getServiceContext()); +} + +PeriodicShardedIndexConsistencyChecker& PeriodicShardedIndexConsistencyChecker::get( + ServiceContext* serviceContext) { + return getPeriodicShardedIndexConsistencyChecker(serviceContext); +} + +long long PeriodicShardedIndexConsistencyChecker::getNumShardedCollsWithInconsistentIndexes() + const { + return _numShardedCollsWithInconsistentIndexes.load(); +} + +void PeriodicShardedIndexConsistencyChecker::_launchShardedIndexConsistencyChecker( + ServiceContext* serviceContext) { + auto periodicRunner = serviceContext->getPeriodicRunner(); + invariant(periodicRunner); + + PeriodicRunner::PeriodicJob job( + "PeriodicShardedIndexConsistencyChecker", + [this](Client* client) { + if (!enableShardedIndexConsistencyCheck.load()) { + return; + } + + log() << "Checking consistency of sharded collection indexes across the cluster"; + + const auto aggRequestBSON = fromjson( + "{pipeline: [{$indexStats: {}}," + "{$group: {_id: null, indexDoc: {$push: \"$$ROOT\"}, allShards: {$addToSet: " + "\"$shard\"}}}, " + "{$unwind: \"$indexDoc\"}, " + "{$group: {\"_id\": \"$indexDoc.name\", \"shards\": {$push: " + "\"$indexDoc.shard\"}, " + "\"specs\": {$addToSet: {$arrayToObject: {$setUnion: {$objectToArray: " + "\"$indexDoc.spec\"}}}}, " + "\"allShards\": {$first: \"$allShards\"}}}," + "{$addFields: {\"missingFromShards\": {$setDifference: [\"$allShards\", " + "\"$shards\"]}}}," + "{$match: {$expr: {$or: [{$gt: [{$size: \"$missingFromShards\"}, 0]}, {$gt: " + "[{$size: \"$specs\"}, 1]}]}}}," + "{$project: {_id: 0, indexName: \"$$ROOT._id\", specs: 1, missingFromShards: " + "1}}, {$limit: 1}], cursor: {}}"); + + auto uniqueOpCtx = client->makeOperationContext(); + auto opCtx = uniqueOpCtx.get(); + + try { + long long numShardedCollsWithInconsistentIndexes = 0; + auto collections = + uassertStatusOK(Grid::get(opCtx)->catalogClient()->getCollections( + opCtx, nullptr, nullptr, repl::ReadConcernLevel::kLocalReadConcern)); + + for (const auto& coll : collections) { + auto nss = coll.getNs(); + + // The only sharded collection in the config database with indexes is + // config.system.sessions. Unfortunately, the code path to run aggregation + // below would currently invariant if one of the targeted shards was the config + // server itself. + if (nss.isConfigDB()) { + continue; + } + + auto request = + uassertStatusOK(AggregationRequest::parseFromBSON(nss, aggRequestBSON)); + + for (int tries = 0;; ++tries) { + const bool canRetry = tries < kMaxNumStaleVersionRetries - 1; + + try { + BSONObjBuilder responseBuilder; + auto status = ClusterAggregate::runAggregate( + opCtx, + ClusterAggregate::Namespaces{nss, nss}, + request, + LiteParsedPipeline{request}, + PrivilegeVector(), + &responseBuilder); + + // Stop counting if the agg command failed for one of the collections + // to avoid recording a false count. + uassertStatusOKWithContext(status, str::stream() << "nss " << nss); + + if (!responseBuilder.obj()["cursor"]["firstBatch"].Array().empty()) { + numShardedCollsWithInconsistentIndexes++; + } + break; + } catch (const ExceptionForCat& ex) { + log() << "Attempt " << tries << " to check index consistency for " + << nss << " received StaleShardVersion error" << causedBy(ex); + if (canRetry) { + continue; + } + throw; + } + } + } + + log() << "Found " << numShardedCollsWithInconsistentIndexes + << " collections with inconsistent indexes"; + + // Update the count. + _numShardedCollsWithInconsistentIndexes.store( + numShardedCollsWithInconsistentIndexes); + } catch (DBException& ex) { + log() << "Failed to check index consistency " << causedBy(ex.toStatus()); + } + }, + Milliseconds(shardedIndexConsistencyCheckIntervalMS)); + _shardedIndexConsistencyChecker = periodicRunner->makeJob(std::move(job)); + _shardedIndexConsistencyChecker.start(); +} + +void PeriodicShardedIndexConsistencyChecker::onStepUp(ServiceContext* serviceContext) { + if (!_isPrimary) { + _isPrimary = true; + if (!_shardedIndexConsistencyChecker.isValid()) { + // If this is the first time we're stepping up, start a thread to periodically check + // index consistency. + _launchShardedIndexConsistencyChecker(serviceContext); + } else { + // If we're stepping up again after having stepped down, just resume the existing task. + _shardedIndexConsistencyChecker.resume(); + } + } +} + +void PeriodicShardedIndexConsistencyChecker::onStepDown() { + if (_isPrimary) { + _isPrimary = false; + invariant(_shardedIndexConsistencyChecker.isValid()); + // We don't need to be checking index consistency unless we're primary. + _shardedIndexConsistencyChecker.pause(); + // Clear the counter to prevent a secondary from reporting an out-of-date count. + _numShardedCollsWithInconsistentIndexes.store(0); + } +} + +void PeriodicShardedIndexConsistencyChecker::onShutDown() { + if (_shardedIndexConsistencyChecker.isValid()) { + _shardedIndexConsistencyChecker.stop(); + } +} + +} // namespace mongo diff --git a/src/mongo/db/s/periodic_sharded_index_consistency_checker.h b/src/mongo/db/s/periodic_sharded_index_consistency_checker.h new file mode 100644 index 0000000000000..0be604649fdb9 --- /dev/null +++ b/src/mongo/db/s/periodic_sharded_index_consistency_checker.h @@ -0,0 +1,95 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/util/periodic_runner.h" + +namespace mongo { + +class OperationContext; +class ServiceContext; + +class PeriodicShardedIndexConsistencyChecker final { + PeriodicShardedIndexConsistencyChecker(const PeriodicShardedIndexConsistencyChecker&) = delete; + PeriodicShardedIndexConsistencyChecker& operator=( + const PeriodicShardedIndexConsistencyChecker&) = delete; + +public: + PeriodicShardedIndexConsistencyChecker() = default; + ~PeriodicShardedIndexConsistencyChecker() = default; + + PeriodicShardedIndexConsistencyChecker(PeriodicShardedIndexConsistencyChecker&& source) = + delete; + PeriodicShardedIndexConsistencyChecker& operator=( + PeriodicShardedIndexConsistencyChecker&& other) = delete; + + /** + * Obtains the service-wide PeriodicShardedIndexConsistencyChecker instance. + */ + static PeriodicShardedIndexConsistencyChecker& get(OperationContext* opCtx); + static PeriodicShardedIndexConsistencyChecker& get(ServiceContext* serviceContext); + + long long getNumShardedCollsWithInconsistentIndexes() const; + + /** + * Invoked when the config server primary enters the 'PRIMARY' state to + * trigger the start of the periodic sharded index consistency check. + */ + void onStepUp(ServiceContext* serviceContext); + + /** + * Invoked when this node which is currently serving as a 'PRIMARY' steps down. + * + * Pauses the periodic job until subsequent step up. This method might be called + * multiple times in succession, which is what happens as a result of incomplete + * transition to primary so it is resilient to that. + */ + void onStepDown(); + + /** + * Invoked when this node is shutting down. Stops the periodic job. + */ + void onShutDown(); + +private: + /** + * Initializes and starts the periodic job. + */ + void _launchShardedIndexConsistencyChecker(ServiceContext* serviceContext); + + bool _isPrimary{false}; + + // Periodic job for counting inconsistent indexes in the cluster. + PeriodicJobAnchor _shardedIndexConsistencyChecker; + + // The latest count of sharded collections with inconsistent indexes. + AtomicWord _numShardedCollsWithInconsistentIndexes{0}; +}; +} // namespace mongo diff --git a/src/mongo/db/s/sharding_runtime_d_params.idl b/src/mongo/db/s/sharding_runtime_d_params.idl index ddb8230386c51..a430c9d654a46 100644 --- a/src/mongo/db/s/sharding_runtime_d_params.idl +++ b/src/mongo/db/s/sharding_runtime_d_params.idl @@ -94,6 +94,23 @@ server_parameters: disableResumableRangeDeleter: description: 'Disable the resumable range deleter and revert to prior behavior.' set_at: [startup, runtime] - cpp_vartype: AtomicWord + cpp_vartype: AtomicWord cpp_varname : disableResumableRangeDeleter default: false + + enableShardedIndexConsistencyCheck: + description: >- + Enable the periodic sharded index consistency check on the config server's primary. + The count of sharded collections with inconsistent indexes is exposed via the + 'shardedIndexConsistency' section in the serverStatus output. + set_at: [startup, runtime] + cpp_vartype: AtomicWord + cpp_varname: enableShardedIndexConsistencyCheck + default: true + + shardedIndexConsistencyCheckIntervalMS: + description: 'Time interval in milliseconds between subsequent index checks.' + set_at: [startup] + cpp_vartype: int + cpp_varname: shardedIndexConsistencyCheckIntervalMS + default: 600000