From 590f2bcfd35fc66e823e54f8b8247a40c37bbab6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Kara=C5=9B?= Date: Thu, 2 Oct 2025 16:46:26 +0200 Subject: [PATCH 01/12] added some comments to the code --- api/v1/mdb/shardedcluster.go | 1 + .../construct/database_construction.go | 1 + .../mongodbshardedcluster_controller.go | 51 +++++++++---------- .../tests/shardedcluster/sharded_cluster.py | 2 +- 4 files changed, 28 insertions(+), 27 deletions(-) diff --git a/api/v1/mdb/shardedcluster.go b/api/v1/mdb/shardedcluster.go index 8eff78cf4..3e03f55e9 100644 --- a/api/v1/mdb/shardedcluster.go +++ b/api/v1/mdb/shardedcluster.go @@ -98,5 +98,6 @@ func (s *ShardedClusterComponentSpec) GetClusterSpecItem(clusterName string) Clu } } // it should never occur - we preprocess all clusterSpecLists + // TODO: this can actually happen if desired cluster is removed, but the deployment state is not yet updated and contains the old cluster config panic(fmt.Errorf("clusterName %s not found in clusterSpecList", clusterName)) } diff --git a/controllers/operator/construct/database_construction.go b/controllers/operator/construct/database_construction.go index 19fea8c99..546e5a7d1 100644 --- a/controllers/operator/construct/database_construction.go +++ b/controllers/operator/construct/database_construction.go @@ -216,6 +216,7 @@ func ReplicaSetOptions(additionalOpts ...func(options *DatabaseStatefulSetOption // shardedOptions group the shared logic for creating Shard, Config servers, and mongos options func shardedOptions(cfg shardedOptionCfg, additionalOpts ...func(options *DatabaseStatefulSetOptions)) DatabaseStatefulSetOptions { + // TODO: we should read from r.desiredConfigServerConfiguration.GetClusterSpecItem clusterComponentSpec := cfg.componentSpec.GetClusterSpecItem(cfg.memberClusterName) statefulSetConfiguration := clusterComponentSpec.StatefulSetConfiguration var statefulSetSpecOverride *appsv1.StatefulSetSpec diff --git a/controllers/operator/mongodbshardedcluster_controller.go b/controllers/operator/mongodbshardedcluster_controller.go index abe384b16..92aa90165 100644 --- a/controllers/operator/mongodbshardedcluster_controller.go +++ b/controllers/operator/mongodbshardedcluster_controller.go @@ -132,16 +132,18 @@ func NewShardedClusterDeploymentState() *ShardedClusterDeploymentState { func (r *ShardedClusterReconcileHelper) initializeMemberClusters(globalMemberClustersMap map[string]client.Client, log *zap.SugaredLogger) error { mongoDB := r.sc shardsMap := r.desiredShardsConfiguration + configSrvSpecList := r.desiredConfigServerConfiguration.ClusterSpecList + mongosClusterSpecList := r.desiredMongosConfiguration.ClusterSpecList if mongoDB.Spec.IsMultiCluster() { if !multicluster.IsMemberClusterMapInitializedForMultiCluster(globalMemberClustersMap) { return xerrors.Errorf("member clusters have to be initialized for MultiCluster Sharded Cluster topology") } allReferencedClusterNamesMap := map[string]struct{}{} - for _, clusterSpecItem := range r.getConfigSrvClusterSpecList() { + for _, clusterSpecItem := range configSrvSpecList { allReferencedClusterNamesMap[clusterSpecItem.ClusterName] = struct{}{} } - for _, clusterSpecItem := range r.getMongosClusterSpecList() { + for _, clusterSpecItem := range mongosClusterSpecList { allReferencedClusterNamesMap[clusterSpecItem.ClusterName] = struct{}{} } for _, shardComponentSpec := range shardsMap { @@ -164,7 +166,7 @@ func (r *ShardedClusterReconcileHelper) initializeMemberClusters(globalMemberClu return 0 } } - r.configSrvMemberClusters = createMemberClusterListFromClusterSpecList(r.getConfigSrvClusterSpecList(), globalMemberClustersMap, log, r.deploymentState.ClusterMapping, configSrvGetLastAppliedMembersFunc, false) + r.configSrvMemberClusters = createMemberClusterListFromClusterSpecList(configSrvSpecList, globalMemberClustersMap, log, r.deploymentState.ClusterMapping, configSrvGetLastAppliedMembersFunc, false) mongosGetLastAppliedMembersFunc := func(memberClusterName string) int { if count, ok := r.deploymentState.Status.SizeStatusInClusters.MongosCountInClusters[memberClusterName]; ok { @@ -173,7 +175,7 @@ func (r *ShardedClusterReconcileHelper) initializeMemberClusters(globalMemberClu return 0 } } - r.mongosMemberClusters = createMemberClusterListFromClusterSpecList(r.getMongosClusterSpecList(), globalMemberClustersMap, log, r.deploymentState.ClusterMapping, mongosGetLastAppliedMembersFunc, false) + r.mongosMemberClusters = createMemberClusterListFromClusterSpecList(mongosClusterSpecList, globalMemberClustersMap, log, r.deploymentState.ClusterMapping, mongosGetLastAppliedMembersFunc, false) r.shardsMemberClustersMap, r.allShardsMemberClusters = r.createShardsMemberClusterLists(shardsMap, globalMemberClustersMap, log, r.deploymentState, false) } else { r.shardsMemberClustersMap, r.allShardsMemberClusters = r.createShardsMemberClusterLists(shardsMap, globalMemberClustersMap, log, r.deploymentState, true) @@ -961,6 +963,7 @@ func (r *ShardedClusterReconcileHelper) Reconcile(ctx context.Context, log *zap. if sc.Spec.ShardCount < r.deploymentState.Status.ShardCount { r.removeUnusedStatefulsets(ctx, sc, log) } + // TODO: we should also remove unused configSrv and mongos statefulsets annotationsToAdd, err := getAnnotationsForResource(sc) if err != nil { @@ -1233,16 +1236,16 @@ func anyStatefulSetNeedsToPublishStateToOM(ctx context.Context, sc mdbv1.MongoDB // This includes the Mongos, the Config Server and all Shards func (r *ShardedClusterReconcileHelper) getAllConfigs(ctx context.Context, sc mdbv1.MongoDB, opts deploymentOptions, log *zap.SugaredLogger) []func(mdb mdbv1.MongoDB) construct.DatabaseStatefulSetOptions { allConfigs := make([]func(mdb mdbv1.MongoDB) construct.DatabaseStatefulSetOptions, 0) - for shardIdx, shardSpec := range r.desiredShardsConfiguration { + for shardIdx := range r.desiredShardsConfiguration { for _, memberCluster := range getHealthyMemberClusters(r.shardsMemberClustersMap[shardIdx]) { - allConfigs = append(allConfigs, r.getShardOptions(ctx, sc, shardIdx, opts, log, shardSpec, memberCluster)) + allConfigs = append(allConfigs, r.getShardOptions(ctx, sc, shardIdx, opts, log, memberCluster)) } } for _, memberCluster := range getHealthyMemberClusters(r.configSrvMemberClusters) { - allConfigs = append(allConfigs, r.getConfigServerOptions(ctx, sc, opts, log, r.desiredConfigServerConfiguration, memberCluster)) + allConfigs = append(allConfigs, r.getConfigServerOptions(ctx, sc, opts, log, memberCluster)) } for _, memberCluster := range getHealthyMemberClusters(r.mongosMemberClusters) { - allConfigs = append(allConfigs, r.getMongosOptions(ctx, sc, opts, log, r.desiredMongosConfiguration, memberCluster)) + allConfigs = append(allConfigs, r.getMongosOptions(ctx, sc, opts, log, memberCluster)) } return allConfigs } @@ -1367,7 +1370,7 @@ func (r *ShardedClusterReconcileHelper) createKubernetesResources(ctx context.Co func (r *ShardedClusterReconcileHelper) createOrUpdateMongos(ctx context.Context, s *mdbv1.MongoDB, opts deploymentOptions, log *zap.SugaredLogger) workflow.Status { // we deploy changes to sts to all mongos in all clusters for _, memberCluster := range getHealthyMemberClusters(r.mongosMemberClusters) { - mongosOpts := r.getMongosOptions(ctx, *s, opts, log, r.desiredMongosConfiguration, memberCluster) + mongosOpts := r.getMongosOptions(ctx, *s, opts, log, memberCluster) mongosSts := construct.DatabaseStatefulSet(*s, mongosOpts, log) if err := create.DatabaseInKubernetes(ctx, memberCluster.Client, *s, mongosSts, mongosOpts, log); err != nil { return workflow.Failed(xerrors.Errorf("Failed to create Mongos Stateful Set: %w", err)) @@ -1393,7 +1396,7 @@ func (r *ShardedClusterReconcileHelper) createOrUpdateShards(ctx context.Context // in single cluster sts name == shard name // in multi cluster sts name contains cluster index, but shard name does not (it's a replicaset name) shardsNames[shardIdx] = s.ShardRsName(shardIdx) - shardOpts := r.getShardOptions(ctx, *s, shardIdx, opts, log, r.desiredShardsConfiguration[shardIdx], memberCluster) + shardOpts := r.getShardOptions(ctx, *s, shardIdx, opts, log, memberCluster) shardSts := construct.DatabaseStatefulSet(*s, shardOpts, log) if workflowStatus := r.handlePVCResize(ctx, memberCluster, &shardSts, log); !workflowStatus.IsOK() { @@ -1437,7 +1440,7 @@ func (r *ShardedClusterReconcileHelper) createOrUpdateConfigServers(ctx context. // for ScalingFirstTime, which is iterating over all member clusters internally anyway configSrvScalingFirstTime := r.GetConfigSrvScaler(r.configSrvMemberClusters[0]).ScalingFirstTime() for _, memberCluster := range getHealthyMemberClusters(r.configSrvMemberClusters) { - configSrvOpts := r.getConfigServerOptions(ctx, *s, opts, log, r.desiredConfigServerConfiguration, memberCluster) + configSrvOpts := r.getConfigServerOptions(ctx, *s, opts, log, memberCluster) configSrvSts := construct.DatabaseStatefulSet(*s, configSrvOpts, log) if workflowStatus := r.handlePVCResize(ctx, memberCluster, &configSrvSts, log); !workflowStatus.IsOK() { @@ -1908,7 +1911,7 @@ func (r *ShardedClusterReconcileHelper) publishDeployment(ctx context.Context, c // We take here the first cluster arbitrarily because the options are used for irrelevant stuff below, same for // config servers and shards below mongosMemberCluster := r.mongosMemberClusters[0] - mongosOptionsFunc := r.getMongosOptions(ctx, *sc, *opts, log, r.desiredMongosConfiguration, mongosMemberCluster) + mongosOptionsFunc := r.getMongosOptions(ctx, *sc, *opts, log, mongosMemberCluster) mongosOptions := mongosOptionsFunc(*r.sc) mongosInternalClusterPath := fmt.Sprintf("%s/%s", util.InternalClusterAuthMountPath, mongosOptions.InternalClusterHash) mongosMemberCertPath := fmt.Sprintf("%s/%s", util.TLSCertMountPath, mongosOptions.CertificateHash) @@ -1919,7 +1922,7 @@ func (r *ShardedClusterReconcileHelper) publishDeployment(ctx context.Context, c // Config server configSrvMemberCluster := r.configSrvMemberClusters[0] - configSrvOptionsFunc := r.getConfigServerOptions(ctx, *sc, *opts, log, r.desiredConfigServerConfiguration, configSrvMemberCluster) + configSrvOptionsFunc := r.getConfigServerOptions(ctx, *sc, *opts, log, configSrvMemberCluster) configSrvOptions := configSrvOptionsFunc(*r.sc) configSrvInternalClusterPath := fmt.Sprintf("%s/%s", util.InternalClusterAuthMountPath, configSrvOptions.InternalClusterHash) @@ -1940,7 +1943,7 @@ func (r *ShardedClusterReconcileHelper) publishDeployment(ctx context.Context, c shards := make([]om.ReplicaSetWithProcesses, sc.Spec.ShardCount) var shardInternalClusterPaths []string for shardIdx := 0; shardIdx < r.sc.Spec.ShardCount; shardIdx++ { - shardOptionsFunc := r.getShardOptions(ctx, *sc, shardIdx, *opts, log, r.desiredShardsConfiguration[shardIdx], r.shardsMemberClustersMap[shardIdx][0]) + shardOptionsFunc := r.getShardOptions(ctx, *sc, shardIdx, *opts, log, r.shardsMemberClustersMap[shardIdx][0]) shardOptions := shardOptionsFunc(*r.sc) shardInternalClusterPaths = append(shardInternalClusterPaths, fmt.Sprintf("%s/%s", util.InternalClusterAuthMountPath, shardOptions.InternalClusterHash)) shardMemberCertPath := fmt.Sprintf("%s/%s", util.TLSCertMountPath, shardOptions.CertificateHash) @@ -2271,7 +2274,7 @@ func buildReplicaSetFromProcesses(name string, members []om.Process, mdb *mdbv1. } // getConfigServerOptions returns the Options needed to build the StatefulSet for the config server. -func (r *ShardedClusterReconcileHelper) getConfigServerOptions(ctx context.Context, sc mdbv1.MongoDB, opts deploymentOptions, log *zap.SugaredLogger, configSrvSpec *mdbv1.ShardedClusterComponentSpec, memberCluster multicluster.MemberCluster) func(mdb mdbv1.MongoDB) construct.DatabaseStatefulSetOptions { +func (r *ShardedClusterReconcileHelper) getConfigServerOptions(ctx context.Context, sc mdbv1.MongoDB, opts deploymentOptions, log *zap.SugaredLogger, memberCluster multicluster.MemberCluster) func(mdb mdbv1.MongoDB) construct.DatabaseStatefulSetOptions { certSecretName := sc.GetSecurity().MemberCertificateSecretName(sc.ConfigRsName()) internalClusterSecretName := sc.GetSecurity().InternalClusterAuthSecretName(sc.ConfigRsName()) @@ -2282,9 +2285,7 @@ func (r *ShardedClusterReconcileHelper) getConfigServerOptions(ctx context.Conte databaseSecretPath = r.commonController.VaultClient.DatabaseSecretPath() } - return construct.ConfigServerOptions( - configSrvSpec, - memberCluster.Name, + return construct.ConfigServerOptions(r.desiredConfigServerConfiguration, memberCluster.Name, Replicas(scale.ReplicasThisReconciliation(r.GetConfigSrvScaler(memberCluster))), StatefulSetNameOverride(r.GetConfigSrvStsName(memberCluster)), ServiceName(r.GetConfigSrvServiceName(memberCluster)), @@ -2295,7 +2296,7 @@ func (r *ShardedClusterReconcileHelper) getConfigServerOptions(ctx context.Conte InternalClusterHash(enterprisepem.ReadHashFromSecret(ctx, r.commonController.SecretClient, sc.Namespace, internalClusterSecretName, databaseSecretPath, log)), PrometheusTLSCertHash(opts.prometheusCertHash), WithVaultConfig(vaultConfig), - WithAdditionalMongodConfig(configSrvSpec.GetAdditionalMongodConfig()), + WithAdditionalMongodConfig(r.desiredConfigServerConfiguration.GetAdditionalMongodConfig()), WithDefaultConfigSrvStorageSize(), WithStsLabels(r.statefulsetLabels()), WithInitDatabaseNonStaticImage(images.ContainerImage(r.imageUrls, util.InitDatabaseImageUrlEnv, r.initDatabaseNonStaticImageVersion)), @@ -2306,7 +2307,7 @@ func (r *ShardedClusterReconcileHelper) getConfigServerOptions(ctx context.Conte } // getMongosOptions returns the Options needed to build the StatefulSet for the mongos. -func (r *ShardedClusterReconcileHelper) getMongosOptions(ctx context.Context, sc mdbv1.MongoDB, opts deploymentOptions, log *zap.SugaredLogger, mongosSpec *mdbv1.ShardedClusterComponentSpec, memberCluster multicluster.MemberCluster) func(mdb mdbv1.MongoDB) construct.DatabaseStatefulSetOptions { +func (r *ShardedClusterReconcileHelper) getMongosOptions(ctx context.Context, sc mdbv1.MongoDB, opts deploymentOptions, log *zap.SugaredLogger, memberCluster multicluster.MemberCluster) func(mdb mdbv1.MongoDB) construct.DatabaseStatefulSetOptions { certSecretName := sc.GetSecurity().MemberCertificateSecretName(sc.MongosRsName()) internalClusterSecretName := sc.GetSecurity().InternalClusterAuthSecretName(sc.MongosRsName()) @@ -2315,9 +2316,7 @@ func (r *ShardedClusterReconcileHelper) getMongosOptions(ctx context.Context, sc vaultConfig = r.commonController.VaultClient.VaultConfig } - return construct.MongosOptions( - mongosSpec, - memberCluster.Name, + return construct.MongosOptions(r.desiredMongosConfiguration, memberCluster.Name, Replicas(scale.ReplicasThisReconciliation(r.GetMongosScaler(memberCluster))), StatefulSetNameOverride(r.GetMongosStsName(memberCluster)), PodEnvVars(opts.podEnvVars), @@ -2327,7 +2326,7 @@ func (r *ShardedClusterReconcileHelper) getMongosOptions(ctx context.Context, sc InternalClusterHash(enterprisepem.ReadHashFromSecret(ctx, r.commonController.SecretClient, sc.Namespace, internalClusterSecretName, vaultConfig.DatabaseSecretPath, log)), PrometheusTLSCertHash(opts.prometheusCertHash), WithVaultConfig(vaultConfig), - WithAdditionalMongodConfig(sc.Spec.MongosSpec.GetAdditionalMongodConfig()), + WithAdditionalMongodConfig(r.desiredMongosConfiguration.GetAdditionalMongodConfig()), WithStsLabels(r.statefulsetLabels()), WithInitDatabaseNonStaticImage(images.ContainerImage(r.imageUrls, util.InitDatabaseImageUrlEnv, r.initDatabaseNonStaticImageVersion)), WithDatabaseNonStaticImage(images.ContainerImage(r.imageUrls, util.NonStaticDatabaseEnterpriseImage, r.databaseNonStaticImageVersion)), @@ -2337,7 +2336,7 @@ func (r *ShardedClusterReconcileHelper) getMongosOptions(ctx context.Context, sc } // getShardOptions returns the Options needed to build the StatefulSet for a given shard. -func (r *ShardedClusterReconcileHelper) getShardOptions(ctx context.Context, sc mdbv1.MongoDB, shardNum int, opts deploymentOptions, log *zap.SugaredLogger, shardSpec *mdbv1.ShardedClusterComponentSpec, memberCluster multicluster.MemberCluster) func(mdb mdbv1.MongoDB) construct.DatabaseStatefulSetOptions { +func (r *ShardedClusterReconcileHelper) getShardOptions(ctx context.Context, sc mdbv1.MongoDB, shardNum int, opts deploymentOptions, log *zap.SugaredLogger, memberCluster multicluster.MemberCluster) func(mdb mdbv1.MongoDB) construct.DatabaseStatefulSetOptions { certSecretName := sc.GetSecurity().MemberCertificateSecretName(sc.ShardRsName(shardNum)) internalClusterSecretName := sc.GetSecurity().InternalClusterAuthSecretName(sc.ShardRsName(shardNum)) @@ -2358,7 +2357,7 @@ func (r *ShardedClusterReconcileHelper) getShardOptions(ctx context.Context, sc InternalClusterHash(enterprisepem.ReadHashFromSecret(ctx, r.commonController.SecretClient, sc.Namespace, internalClusterSecretName, databaseSecretPath, log)), PrometheusTLSCertHash(opts.prometheusCertHash), WithVaultConfig(vaultConfig), - WithAdditionalMongodConfig(shardSpec.GetAdditionalMongodConfig()), + WithAdditionalMongodConfig(r.desiredShardsConfiguration[shardNum].GetAdditionalMongodConfig()), WithStsLabels(r.statefulsetLabels()), WithInitDatabaseNonStaticImage(images.ContainerImage(r.imageUrls, util.InitDatabaseImageUrlEnv, r.initDatabaseNonStaticImageVersion)), WithDatabaseNonStaticImage(images.ContainerImage(r.imageUrls, util.NonStaticDatabaseEnterpriseImage, r.databaseNonStaticImageVersion)), diff --git a/docker/mongodb-kubernetes-tests/tests/shardedcluster/sharded_cluster.py b/docker/mongodb-kubernetes-tests/tests/shardedcluster/sharded_cluster.py index 650bc9275..8ef232656 100644 --- a/docker/mongodb-kubernetes-tests/tests/shardedcluster/sharded_cluster.py +++ b/docker/mongodb-kubernetes-tests/tests/shardedcluster/sharded_cluster.py @@ -34,7 +34,7 @@ def sc(namespace: str, custom_mdb_version: str) -> MongoDB: resource=resource, shard_members_array=[1, 1, 1], mongos_members_array=[1, 1, None], - configsrv_members_array=[1, 1, 1], + configsrv_members_array=[3, 1, 0], ) return resource From f12a39c0708f6e308e93f22c47f92cb3168219b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Kara=C5=9B?= Date: Fri, 3 Oct 2025 14:09:16 +0200 Subject: [PATCH 02/12] Add comment for appDB similar issue --- api/v1/om/appdb_types.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/api/v1/om/appdb_types.go b/api/v1/om/appdb_types.go index 2a1102af9..2c4b28ebb 100644 --- a/api/v1/om/appdb_types.go +++ b/api/v1/om/appdb_types.go @@ -494,6 +494,9 @@ func (m *AppDBSpec) GetMemberClusterSpecByName(memberClusterName string) mdbv1.C // In case the member cluster is not found in the cluster spec list, we return an empty ClusterSpecItem // with 0 members to handle the case of removing a cluster from the spec list without a panic. + // TODO: this is not ideal, because we don't consider other fields that were removed i.e.MemberConfig. + // When scaling down we should consider the full spec that was used to create the cluster. + // Create a ticket return mdbv1.ClusterSpecItem{ ClusterName: memberClusterName, Members: 0, From 22d4993c3165036004fd13c9ba20a3dd1d51a7d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Kara=C5=9B?= Date: Fri, 3 Oct 2025 14:09:39 +0200 Subject: [PATCH 03/12] Add sharded cluster validation --- api/v1/mdb/mongodb_validation.go | 2 + api/v1/mdb/sharded_cluster_validation.go | 32 +++++++++++ api/v1/mdb/sharded_cluster_validation_test.go | 54 +++++++++++++++++++ api/v1/mdb/shardedcluster.go | 21 ++++++-- 4 files changed, 105 insertions(+), 4 deletions(-) diff --git a/api/v1/mdb/mongodb_validation.go b/api/v1/mdb/mongodb_validation.go index d33d3167c..fee442ea8 100644 --- a/api/v1/mdb/mongodb_validation.go +++ b/api/v1/mdb/mongodb_validation.go @@ -472,6 +472,8 @@ func (m *MongoDB) RunValidations(old *MongoDB) []v1.ValidationResult { } } } + + updateValidators = append(updateValidators, ShardedClusterMultiUpdateValidators()...) } else { for _, validator := range ShardedClusterSingleValidators() { res := validator(*m) diff --git a/api/v1/mdb/sharded_cluster_validation.go b/api/v1/mdb/sharded_cluster_validation.go index 2b24a597c..682774e9a 100644 --- a/api/v1/mdb/sharded_cluster_validation.go +++ b/api/v1/mdb/sharded_cluster_validation.go @@ -43,6 +43,14 @@ func ShardedClusterMultiValidators() []func(m MongoDB) []v1.ValidationResult { } } +func ShardedClusterMultiUpdateValidators() []func(newObj, oldObj MongoDbSpec) v1.ValidationResult { + return []func(newObj, oldObj MongoDbSpec) v1.ValidationResult{ + nonEmptyShardClusterSpecItemRemoval, + nonEmptyConfigSrvClusterSpecItemRemoval, + nonEmptyMongosClusterSpecItemRemoval, + } +} + // This applies to any topology func shardCountSpecified(m MongoDB) v1.ValidationResult { if m.Spec.ShardCount == 0 { @@ -374,3 +382,27 @@ func validateMemberClusterIsSubsetOfKubeConfig(m MongoDB) v1.ValidationResult { return v1.ValidationSuccess() } + +func nonEmptyShardClusterSpecItemRemoval(newObj, oldObj MongoDbSpec) v1.ValidationResult { + return nonEmptyClusterSpecItemRemoval("shard", newObj.ShardSpec, oldObj.ShardSpec) +} + +func nonEmptyConfigSrvClusterSpecItemRemoval(newObj, oldObj MongoDbSpec) v1.ValidationResult { + return nonEmptyClusterSpecItemRemoval("configSrv", newObj.ConfigSrvSpec, oldObj.ConfigSrvSpec) +} + +func nonEmptyMongosClusterSpecItemRemoval(newObj, oldObj MongoDbSpec) v1.ValidationResult { + return nonEmptyClusterSpecItemRemoval("mongos", newObj.MongosSpec, oldObj.MongosSpec) +} + +// This validation blocks removing ClusterSpecItem if members count is more than zero (ShardedCluster). If user wants to remove +// the cluster, they should first scale down members to zero and then remove the ClusterSpecItem. +func nonEmptyClusterSpecItemRemoval(kind string, newClusterSpec, oldClusterSpec *ShardedClusterComponentSpec) v1.ValidationResult { + for _, oldClusterSpecItem := range oldClusterSpec.ClusterSpecList { + if !newClusterSpec.ClusterSpecItemExists(oldClusterSpecItem.ClusterName) && oldClusterSpecItem.Members > 0 { + return v1.ValidationError("Cannot remove %s cluster %s with non-zero members count. Please scale down members to zero first", kind, oldClusterSpecItem.ClusterName) + } + } + + return v1.ValidationSuccess() +} diff --git a/api/v1/mdb/sharded_cluster_validation_test.go b/api/v1/mdb/sharded_cluster_validation_test.go index 6918c4d16..132befc30 100644 --- a/api/v1/mdb/sharded_cluster_validation_test.go +++ b/api/v1/mdb/sharded_cluster_validation_test.go @@ -668,3 +668,57 @@ users: t.Setenv(multicluster.KubeConfigPathEnv, file.Name()) return file } + +func TestNonEmptyShardClusterSpecItemRemoval(t *testing.T) { + scOld := NewDefaultMultiShardedClusterBuilder().Build() + + scNew := NewDefaultMultiShardedClusterBuilder().Build() + // Removing `test-cluster-0` with 2 members from the ClusterSpecList + scNew.Spec.ShardSpec.ClusterSpecList = ClusterSpecList{ + { + ClusterName: "test-cluster-1", + Members: 3, + }, + } + + _, err := scNew.ValidateUpdate(scOld) + + require.Error(t, err) + assert.Equal(t, "Cannot remove shard cluster test-cluster-0 with non-zero members count. Please scale down members to zero first", err.Error()) +} + +func TestNonEmptyConfigSrcClusterSpecItemRemoval(t *testing.T) { + scOld := NewDefaultMultiShardedClusterBuilder().Build() + + scNew := NewDefaultMultiShardedClusterBuilder().Build() + // Removing `test-cluster-0` with 2 members from the ClusterSpecList + scNew.Spec.ConfigSrvSpec.ClusterSpecList = ClusterSpecList{ + { + ClusterName: "test-cluster-1", + Members: 3, + }, + } + + _, err := scNew.ValidateUpdate(scOld) + + require.Error(t, err) + assert.Equal(t, "Cannot remove configSrv cluster test-cluster-0 with non-zero members count. Please scale down members to zero first", err.Error()) +} + +func TestNonEmptyMongosClusterSpecItemRemoval(t *testing.T) { + scOld := NewDefaultMultiShardedClusterBuilder().Build() + + scNew := NewDefaultMultiShardedClusterBuilder().Build() + // Removing `test-cluster-0` with 2 members from the ClusterSpecList + scNew.Spec.MongosSpec.ClusterSpecList = ClusterSpecList{ + { + ClusterName: "test-cluster-1", + Members: 3, + }, + } + + _, err := scNew.ValidateUpdate(scOld) + + require.Error(t, err) + assert.Equal(t, "Cannot remove mongos cluster test-cluster-0 with non-zero members count. Please scale down members to zero first", err.Error()) +} diff --git a/api/v1/mdb/shardedcluster.go b/api/v1/mdb/shardedcluster.go index 3e03f55e9..18b55b9b3 100644 --- a/api/v1/mdb/shardedcluster.go +++ b/api/v1/mdb/shardedcluster.go @@ -91,13 +91,26 @@ func (s *ShardedClusterComponentSpec) GetAgentConfig() *AgentConfig { return &s.Agent } +func (s *ShardedClusterComponentSpec) ClusterSpecItemExists(clusterName string) bool { + return s.getClusterSpecItemOrNil(clusterName) != nil +} + func (s *ShardedClusterComponentSpec) GetClusterSpecItem(clusterName string) ClusterSpecItem { - for i := range s.ClusterSpecList { - if s.ClusterSpecList[i].ClusterName == clusterName { - return s.ClusterSpecList[i] - } + if clusterSpecItem := s.getClusterSpecItemOrNil(clusterName); clusterSpecItem != nil { + return *clusterSpecItem } + // it should never occur - we preprocess all clusterSpecLists // TODO: this can actually happen if desired cluster is removed, but the deployment state is not yet updated and contains the old cluster config panic(fmt.Errorf("clusterName %s not found in clusterSpecList", clusterName)) } + +func (s *ShardedClusterComponentSpec) getClusterSpecItemOrNil(clusterName string) *ClusterSpecItem { + for i := range s.ClusterSpecList { + if s.ClusterSpecList[i].ClusterName == clusterName { + return &s.ClusterSpecList[i] + } + } + + return nil +} From 0e4c8d55673a020114ab5595088ae1c049591940 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Kara=C5=9B?= Date: Fri, 3 Oct 2025 16:17:04 +0200 Subject: [PATCH 04/12] Semi working unit tests --- api/v1/mdb/mongodb_types.go | 43 ++++ .../mongodbshardedcluster_controller.go | 110 ++++----- ...odbshardedcluster_controller_multi_test.go | 212 ++++++++++++++++-- 3 files changed, 279 insertions(+), 86 deletions(-) diff --git a/api/v1/mdb/mongodb_types.go b/api/v1/mdb/mongodb_types.go index 91765cc81..f6772a2e5 100644 --- a/api/v1/mdb/mongodb_types.go +++ b/api/v1/mdb/mongodb_types.go @@ -25,6 +25,7 @@ import ( "github.com/mongodb/mongodb-kubernetes/pkg/dns" "github.com/mongodb/mongodb-kubernetes/pkg/fcv" "github.com/mongodb/mongodb-kubernetes/pkg/kube" + "github.com/mongodb/mongodb-kubernetes/pkg/multicluster" "github.com/mongodb/mongodb-kubernetes/pkg/util" "github.com/mongodb/mongodb-kubernetes/pkg/util/env" "github.com/mongodb/mongodb-kubernetes/pkg/util/stringutil" @@ -1665,6 +1666,48 @@ func (m *MongoDbSpec) IsMultiCluster() bool { return m.GetTopology() == ClusterTopologyMultiCluster } +func (m *MongoDbSpec) GetShardClusterSpecList() ClusterSpecList { + if m.IsMultiCluster() { + return m.ShardSpec.ClusterSpecList + } else { + return ClusterSpecList{ + { + ClusterName: multicluster.LegacyCentralClusterName, + Members: m.MongodsPerShardCount, + MemberConfig: m.MemberConfig, + }, + } + } +} + +func (m *MongoDbSpec) GetMongosClusterSpecList() ClusterSpecList { + if m.IsMultiCluster() { + return m.MongosSpec.ClusterSpecList + } else { + return ClusterSpecList{ + { + ClusterName: multicluster.LegacyCentralClusterName, + Members: m.MongosCount, + ExternalAccessConfiguration: m.ExternalAccessConfiguration, + }, + } + } +} + +func (m *MongoDbSpec) GetConfigSrvClusterSpecList() ClusterSpecList { + if m.IsMultiCluster() { + return m.ConfigSrvSpec.ClusterSpecList + } else { + return ClusterSpecList{ + { + ClusterName: multicluster.LegacyCentralClusterName, + Members: m.ConfigServerCount, + MemberConfig: m.MemberConfig, + }, + } + } +} + type MongoDBConnectionStringBuilder struct { MongoDB hostnames []string diff --git a/controllers/operator/mongodbshardedcluster_controller.go b/controllers/operator/mongodbshardedcluster_controller.go index 92aa90165..6909ebc2d 100644 --- a/controllers/operator/mongodbshardedcluster_controller.go +++ b/controllers/operator/mongodbshardedcluster_controller.go @@ -292,63 +292,13 @@ func (r *ShardedClusterReconcileHelper) getShardNameToShardIdxMap() map[string]i return mapping } -func (r *ShardedClusterReconcileHelper) getShardClusterSpecList() mdbv1.ClusterSpecList { - spec := r.sc.Spec - if spec.IsMultiCluster() { - return spec.ShardSpec.ClusterSpecList - } else { - return mdbv1.ClusterSpecList{ - { - ClusterName: multicluster.LegacyCentralClusterName, - Members: spec.MongodsPerShardCount, - MemberConfig: spec.MemberConfig, - }, - } - } -} - -func (r *ShardedClusterReconcileHelper) getMongosClusterSpecList() mdbv1.ClusterSpecList { - spec := r.sc.Spec - if spec.IsMultiCluster() { - // TODO return merged, desired mongos configuration - return spec.MongosSpec.ClusterSpecList - } else { - return mdbv1.ClusterSpecList{ - { - ClusterName: multicluster.LegacyCentralClusterName, - Members: spec.MongosCount, - ExternalAccessConfiguration: spec.ExternalAccessConfiguration, - }, - } - } -} - -func (r *ShardedClusterReconcileHelper) getConfigSrvClusterSpecList() mdbv1.ClusterSpecList { - spec := r.sc.Spec - if spec.IsMultiCluster() { - return spec.ConfigSrvSpec.ClusterSpecList - } else { - return mdbv1.ClusterSpecList{ - { - ClusterName: multicluster.LegacyCentralClusterName, - Members: spec.ConfigServerCount, - MemberConfig: spec.MemberConfig, - }, - } - } -} - // prepareDesiredShardsConfiguration calculates full expected configuration of sharded cluster spec resource. // It returns map of each shard (by index) with its configuration over all clusters and applying all pods spec overrides. // In other words, this function is rendering final configuration of each shard over all member clusters applying all override logic. // The reconciler implementation should refer to this structure only without taking into consideration complexities of MongoDbSpec wrt sharded clusters. func (r *ShardedClusterReconcileHelper) prepareDesiredShardsConfiguration() map[int]*mdbv1.ShardedClusterComponentSpec { spec := r.sc.Spec.DeepCopy() - // We initialize ClusterSpecList to contain a single legacy cluster in case of SingleCluster mode. - if spec.ShardSpec == nil { - spec.ShardSpec = &mdbv1.ShardedClusterComponentSpec{} - } - spec.ShardSpec.ClusterSpecList = r.getShardClusterSpecList() + // We don't need to do the same for shardOverrides for single-cluster as shardOverrides[].ClusterSpecList can be set only for Multi-Cluster mode. // And we don't need that artificial legacy cluster as for single-cluster all necessary configuration is defined top-level. @@ -359,8 +309,9 @@ func (r *ShardedClusterReconcileHelper) prepareDesiredShardsConfiguration() map[ for shardIdx := 0; shardIdx < max(spec.ShardCount, r.deploymentState.Status.ShardCount); shardIdx++ { topLevelPersistenceOverride, topLevelPodSpecOverride := getShardTopLevelOverrides(spec, shardIdx) + shardComponentSpec := *spec.ShardSpec.DeepCopy() - shardComponentSpec.ClusterSpecList = processClusterSpecList(shardComponentSpec.ClusterSpecList, topLevelPodSpecOverride, topLevelPersistenceOverride) + shardComponentSpec.ClusterSpecList = processClusterSpecList(spec.GetShardClusterSpecList(), topLevelPodSpecOverride, topLevelPersistenceOverride) shardComponentSpecs[shardIdx] = &shardComponentSpec } @@ -559,29 +510,21 @@ func extractOverridesFromPodSpec(podSpec *mdbv1.MongoDbPodSpec) (*corev1.PodTemp // We share the same logic and data structures used for Config Server, although some fields are not relevant for mongos // e.g MemberConfig. They will simply be ignored when the database is constructed func (r *ShardedClusterReconcileHelper) prepareDesiredMongosConfiguration() *mdbv1.ShardedClusterComponentSpec { - // We initialize ClusterSpecList to contain a single legacy cluster in case of SingleCluster mode. - spec := r.sc.Spec.DeepCopy() - if spec.MongosSpec == nil { - spec.MongosSpec = &mdbv1.ShardedClusterComponentSpec{} - } - spec.MongosSpec.ClusterSpecList = r.getMongosClusterSpecList() - topLevelPodSpecOverride, topLevelPersistenceOverride := extractOverridesFromPodSpec(spec.MongosPodSpec) - mongosComponentSpec := spec.MongosSpec.DeepCopy() - mongosComponentSpec.ClusterSpecList = processClusterSpecList(mongosComponentSpec.ClusterSpecList, topLevelPodSpecOverride, topLevelPersistenceOverride) + topLevelPodSpecOverride, topLevelPersistenceOverride := extractOverridesFromPodSpec(r.sc.Spec.MongosPodSpec) + + mongosComponentSpec := r.sc.Spec.MongosSpec.DeepCopy() + mongosComponentSpec.ClusterSpecList = processClusterSpecList(r.sc.Spec.GetMongosClusterSpecList(), topLevelPodSpecOverride, topLevelPersistenceOverride) + return mongosComponentSpec } // prepareDesiredConfigServerConfiguration works the same way as prepareDesiredMongosConfiguration, but for config server func (r *ShardedClusterReconcileHelper) prepareDesiredConfigServerConfiguration() *mdbv1.ShardedClusterComponentSpec { - // We initialize ClusterSpecList to contain a single legacy cluster in case of SingleCluster mode. - spec := r.sc.Spec.DeepCopy() - if spec.ConfigSrvSpec == nil { - spec.ConfigSrvSpec = &mdbv1.ShardedClusterComponentSpec{} - } - spec.ConfigSrvSpec.ClusterSpecList = r.getConfigSrvClusterSpecList() - topLevelPodSpecOverride, topLevelPersistenceOverride := extractOverridesFromPodSpec(spec.ConfigSrvPodSpec) - configSrvComponentSpec := spec.ConfigSrvSpec.DeepCopy() - configSrvComponentSpec.ClusterSpecList = processClusterSpecList(configSrvComponentSpec.ClusterSpecList, topLevelPodSpecOverride, topLevelPersistenceOverride) + topLevelPodSpecOverride, topLevelPersistenceOverride := extractOverridesFromPodSpec(r.sc.Spec.ConfigSrvPodSpec) + + configSrvComponentSpec := r.sc.Spec.ConfigSrvSpec.DeepCopy() + configSrvComponentSpec.ClusterSpecList = processClusterSpecList(r.sc.Spec.GetConfigSrvClusterSpecList(), topLevelPodSpecOverride, topLevelPersistenceOverride) + return configSrvComponentSpec } @@ -889,6 +832,11 @@ func (r *ShardedClusterReconcileHelper) Reconcile(ctx context.Context, log *zap. return r.updateStatus(ctx, sc, workflow.Failed(err), log) } + // TODO: add comment why this was added + if err := r.blockNonEmptyClusterSpecItemRemoval(); err != nil { + return r.updateStatus(ctx, sc, workflow.Failed(err), log) + } + if !architectures.IsRunningStaticArchitecture(sc.Annotations) { agents.UpgradeAllIfNeeded(ctx, agents.ClientSecret{Client: r.commonController.client, SecretClient: r.commonController.SecretClient}, r.omConnectionFactory, GetWatchedNamespace(), false) } @@ -2976,6 +2924,28 @@ func (r *ShardedClusterReconcileHelper) getHealthyShardsProcesses() ([]string, [ return hostnames, processNames } +func (r *ShardedClusterReconcileHelper) blockNonEmptyClusterSpecItemRemoval() error { + for _, shardCluster := range r.allShardsMemberClusters { + if !r.sc.Spec.ShardSpec.ClusterSpecItemExists(shardCluster.Name) && shardCluster.Replicas > 0 { + return xerrors.Errorf("Cannot remove shard member cluster %s with non-zero members count. Please scale down members to zero first", shardCluster.Name) + } + } + + for _, configSrvCluster := range r.configSrvMemberClusters { + if !r.sc.Spec.ConfigSrvSpec.ClusterSpecItemExists(configSrvCluster.Name) && configSrvCluster.Replicas > 0 { + return xerrors.Errorf("Cannot remove configSrv member cluster %s with non-zero members count. Please scale down members to zero first", configSrvCluster.Name) + } + } + + for _, mongosCluster := range r.mongosMemberClusters { + if !r.sc.Spec.MongosSpec.ClusterSpecItemExists(mongosCluster.Name) && mongosCluster.Replicas > 0 { + return xerrors.Errorf("Cannot remove mongos member cluster %s with non-zero members count. Please scale down members to zero first", mongosCluster.Name) + } + } + + return nil +} + // checkForMongosDeadlock reports whether the cluster is in a deadlocked state due to mongos waiting on unhealthy // processes (https://jira.mongodb.org/browse/CLOUDP-288588) // We are in a deadlock if: diff --git a/controllers/operator/mongodbshardedcluster_controller_multi_test.go b/controllers/operator/mongodbshardedcluster_controller_multi_test.go index 158665f3b..0f0c7d081 100644 --- a/controllers/operator/mongodbshardedcluster_controller_multi_test.go +++ b/controllers/operator/mongodbshardedcluster_controller_multi_test.go @@ -112,11 +112,11 @@ func ConvertTargetStateToMap(scName string, shardOverridesDistribution []map[str return convertedMap } -type BlockReconcileScalingBothWaysTestCase struct { - name string - initialState MultiClusterShardedScalingStep - targetState MultiClusterShardedScalingStep - expectError bool +type StateChangeTestCase struct { + name string + initialState MultiClusterShardedScalingStep + targetState MultiClusterShardedScalingStep + expectedError string } // TestBlockReconcileScalingBothWays checks that we block reconciliation when member clusters in a replica set need to @@ -125,7 +125,7 @@ func TestBlockReconcileScalingBothWays(t *testing.T) { cluster1 := "member-cluster-1" cluster2 := "member-cluster-2" cluster3 := "member-cluster-3" - testCases := []BlockReconcileScalingBothWaysTestCase{ + testCases := []StateChangeTestCase{ { name: "No scaling", initialState: MultiClusterShardedScalingStep{ @@ -152,7 +152,6 @@ func TestBlockReconcileScalingBothWays(t *testing.T) { cluster1: 1, cluster2: 1, cluster3: 1, }, }, - expectError: false, }, { name: "Scaling in the same direction", @@ -180,7 +179,6 @@ func TestBlockReconcileScalingBothWays(t *testing.T) { cluster1: 1, cluster2: 1, cluster3: 3, }, }, - expectError: false, }, { name: "Scaling both directions: cfg server and mongos", @@ -210,7 +208,7 @@ func TestBlockReconcileScalingBothWays(t *testing.T) { cluster1: 1, cluster2: 1, cluster3: 1, }, }, - expectError: true, + expectedError: "Cannot perform scale up and scale down operations at the same time", }, { name: "Scale both ways because of shard override", @@ -247,7 +245,7 @@ func TestBlockReconcileScalingBothWays(t *testing.T) { {cluster1: 1, cluster2: 3, cluster3: 1}, }, }, - expectError: true, + expectedError: "Cannot perform scale up and scale down operations at the same time", }, { // Increasing shardCount creates a new shard, that scales from 0 members. We want to block reconciliation @@ -284,7 +282,7 @@ func TestBlockReconcileScalingBothWays(t *testing.T) { {cluster1: 1, cluster2: 1, cluster3: 1}, }, }, - expectError: true, + expectedError: "Cannot perform scale up and scale down operations at the same time", }, { // We move replicas from one cluster to another, without changing the total number, and we scale shards up @@ -318,18 +316,200 @@ func TestBlockReconcileScalingBothWays(t *testing.T) { {cluster1: 0, cluster2: 2, cluster3: 1}, // Moved two replicas by adding an override, but no scaling }, }, - expectError: true, + expectedError: "Cannot perform scale up and scale down operations at the same time", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + StateChangeTest(t, tc) + }) + } +} + +// TestBlockNonEmptyClusterSpecItemRemoval checks that we block reconciliation when user removes ClusterSpecItem from +// the spec, while that cluster still has non-zero members in the current state +func TestBlockNonEmptyClusterSpecItemRemoval(t *testing.T) { + cluster1 := "member-cluster-1" + cluster2 := "member-cluster-2" + cluster3 := "member-cluster-3" + testCases := []StateChangeTestCase{ + { + name: "Removing zero-member shard ClusterSpecItem doesn't block reconciliation", + initialState: MultiClusterShardedScalingStep{ + shardCount: 3, + configServerDistribution: map[string]int{ + cluster1: 2, cluster2: 1, cluster3: 0, + }, + mongosDistribution: map[string]int{ + cluster1: 1, cluster2: 1, cluster3: 0, + }, + shardDistribution: map[string]int{ + cluster1: 0, cluster2: 1, cluster3: 2, + }, + }, + targetState: MultiClusterShardedScalingStep{ + shardCount: 3, + configServerDistribution: map[string]int{ + cluster1: 2, cluster2: 1, cluster3: 0, + }, + mongosDistribution: map[string]int{ + cluster1: 1, cluster2: 1, cluster3: 0, + }, + shardDistribution: map[string]int{ + cluster2: 1, cluster3: 2, + }, + }, + }, + { + name: "Removing zero-member configSrv ClusterSpecItem doesn't block reconciliation", + initialState: MultiClusterShardedScalingStep{ + shardCount: 3, + configServerDistribution: map[string]int{ + cluster1: 2, cluster2: 1, cluster3: 0, + }, + mongosDistribution: map[string]int{ + cluster1: 1, cluster2: 1, cluster3: 0, + }, + shardDistribution: map[string]int{ + cluster1: 0, cluster2: 1, cluster3: 2, + }, + }, + targetState: MultiClusterShardedScalingStep{ + shardCount: 3, + configServerDistribution: map[string]int{ + cluster1: 2, cluster2: 1, + }, + mongosDistribution: map[string]int{ + cluster1: 1, cluster2: 1, cluster3: 0, + }, + shardDistribution: map[string]int{ + cluster1: 0, cluster2: 1, cluster3: 2, + }, + }, + }, + { + name: "Removing zero-member mongos ClusterSpecItem doesn't block reconciliation", + initialState: MultiClusterShardedScalingStep{ + shardCount: 3, + configServerDistribution: map[string]int{ + cluster1: 2, cluster2: 1, cluster3: 0, + }, + mongosDistribution: map[string]int{ + cluster1: 1, cluster2: 1, cluster3: 0, + }, + shardDistribution: map[string]int{ + cluster1: 0, cluster2: 1, cluster3: 2, + }, + }, + targetState: MultiClusterShardedScalingStep{ + shardCount: 3, + configServerDistribution: map[string]int{ + cluster1: 2, cluster2: 1, cluster3: 0, + }, + mongosDistribution: map[string]int{ + cluster1: 1, cluster2: 1, + }, + shardDistribution: map[string]int{ + cluster1: 0, cluster2: 1, cluster3: 2, + }, + }, + }, + // TODO: this still fails with panic, because the r.allShardsMemberClusters does not contain previous cluster + // This is likely another bug + { + name: "Removing non-zero shard ClusterSpecItem blocks reconciliation", + initialState: MultiClusterShardedScalingStep{ + shardCount: 3, + configServerDistribution: map[string]int{ + cluster1: 2, cluster2: 1, cluster3: 2, + }, + mongosDistribution: map[string]int{ + cluster1: 1, cluster2: 1, cluster3: 2, + }, + shardDistribution: map[string]int{ + cluster1: 1, cluster2: 1, cluster3: 3, + }, + }, + targetState: MultiClusterShardedScalingStep{ + shardCount: 3, + configServerDistribution: map[string]int{ + cluster1: 2, cluster2: 1, cluster3: 2, + }, + mongosDistribution: map[string]int{ + cluster1: 1, cluster2: 1, cluster3: 2, + }, + shardDistribution: map[string]int{ + cluster2: 1, cluster3: 3, + }, + }, + }, + { + name: "Removing non-zero configSrv ClusterSpecItem blocks reconciliation", + initialState: MultiClusterShardedScalingStep{ + shardCount: 3, + configServerDistribution: map[string]int{ + cluster1: 2, cluster2: 1, cluster3: 2, + }, + mongosDistribution: map[string]int{ + cluster1: 1, cluster2: 1, cluster3: 2, + }, + shardDistribution: map[string]int{ + cluster1: 1, cluster2: 1, cluster3: 3, + }, + }, + targetState: MultiClusterShardedScalingStep{ + shardCount: 3, + configServerDistribution: map[string]int{ + cluster1: 2, cluster3: 2, + }, + mongosDistribution: map[string]int{ + cluster1: 1, cluster2: 1, cluster3: 2, + }, + shardDistribution: map[string]int{ + cluster1: 1, cluster2: 1, cluster3: 3, + }, + }, + expectedError: "Cannot remove configSrv member cluster member-cluster-2 with non-zero members count. Please scale down members to zero first", + }, + { + name: "Removing non-zero mongos ClusterSpecItem blocks reconciliation", + initialState: MultiClusterShardedScalingStep{ + shardCount: 3, + configServerDistribution: map[string]int{ + cluster1: 2, cluster2: 1, cluster3: 2, + }, + mongosDistribution: map[string]int{ + cluster1: 1, cluster2: 1, cluster3: 2, + }, + shardDistribution: map[string]int{ + cluster1: 1, cluster2: 1, cluster3: 3, + }, + }, + targetState: MultiClusterShardedScalingStep{ + shardCount: 3, + configServerDistribution: map[string]int{ + cluster1: 2, cluster2: 1, cluster3: 2, + }, + mongosDistribution: map[string]int{ + cluster2: 1, cluster3: 2, + }, + shardDistribution: map[string]int{ + cluster1: 1, cluster2: 1, cluster3: 3, + }, + }, + expectedError: "Cannot remove mongos member cluster member-cluster-1 with non-zero members count. Please scale down members to zero first", }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - BlockReconcileScalingBothWaysCase(t, tc) + StateChangeTest(t, tc) }) } } -func BlockReconcileScalingBothWaysCase(t *testing.T, tc BlockReconcileScalingBothWaysTestCase) { +func StateChangeTest(t *testing.T, tc StateChangeTestCase) { ctx := context.Background() cluster1 := "member-cluster-1" cluster2 := "member-cluster-2" @@ -373,8 +553,8 @@ func BlockReconcileScalingBothWaysCase(t *testing.T, tc BlockReconcileScalingBot require.NoError(t, err) // The validation happens at the beginning of the reconciliation loop. We expect to fail immediately when scaling is // invalid, or stay in pending phase otherwise. - if tc.expectError { - checkReconcileFailed(ctx, t, reconciler, sc, true, "Cannot perform scale up and scale down operations at the same time", kubeClient) + if tc.expectedError != "" { + checkReconcileFailed(ctx, t, reconciler, sc, true, tc.expectedError, kubeClient) } else { checkReconcilePending(ctx, t, reconciler, sc, "StatefulSet not ready", kubeClient, 3) } From cd336204986e50c97e52c7a6ab0811acd815c95d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Kara=C5=9B?= Date: Mon, 6 Oct 2025 11:24:47 +0200 Subject: [PATCH 05/12] Working unit tests --- controllers/operator/mongodbshardedcluster_controller.go | 8 +++++--- .../mongodbshardedcluster_controller_multi_test.go | 3 +++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/controllers/operator/mongodbshardedcluster_controller.go b/controllers/operator/mongodbshardedcluster_controller.go index 6909ebc2d..cc78ab187 100644 --- a/controllers/operator/mongodbshardedcluster_controller.go +++ b/controllers/operator/mongodbshardedcluster_controller.go @@ -2925,9 +2925,11 @@ func (r *ShardedClusterReconcileHelper) getHealthyShardsProcesses() ([]string, [ } func (r *ShardedClusterReconcileHelper) blockNonEmptyClusterSpecItemRemoval() error { - for _, shardCluster := range r.allShardsMemberClusters { - if !r.sc.Spec.ShardSpec.ClusterSpecItemExists(shardCluster.Name) && shardCluster.Replicas > 0 { - return xerrors.Errorf("Cannot remove shard member cluster %s with non-zero members count. Please scale down members to zero first", shardCluster.Name) + for shardIdx, shardClusters := range r.shardsMemberClustersMap { + for _, shardCluster := range shardClusters { + if !r.sc.Spec.ShardSpec.ClusterSpecItemExists(shardCluster.Name) && shardCluster.Replicas > 0 { + return xerrors.Errorf("Cannot remove shard member cluster %s with non-zero members count in shard %d. Please scale down members to zero first", shardCluster.Name, shardIdx) + } } } diff --git a/controllers/operator/mongodbshardedcluster_controller_multi_test.go b/controllers/operator/mongodbshardedcluster_controller_multi_test.go index 0f0c7d081..f8a60a492 100644 --- a/controllers/operator/mongodbshardedcluster_controller_multi_test.go +++ b/controllers/operator/mongodbshardedcluster_controller_multi_test.go @@ -443,6 +443,9 @@ func TestBlockNonEmptyClusterSpecItemRemoval(t *testing.T) { cluster2: 1, cluster3: 3, }, }, + // Full error message `Cannot remove shard member cluster member-cluster-1 with non-zero members count in shard [x]. Please scale down members to zero first`, + // but the shard index can be 0, 1 or 2 depending on map iteration order. Thus, we only check the error substring here. + expectedError: "Cannot remove shard member cluster member-cluster-1 with non-zero members count in shard", }, { name: "Removing non-zero configSrv ClusterSpecItem blocks reconciliation", From 98e7244e2d6c6c86e1ed453ccfac86ec09d80c9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Kara=C5=9B?= Date: Mon, 6 Oct 2025 13:06:39 +0200 Subject: [PATCH 06/12] Remove mc-sharded validations + reconcile blocking loop --- api/v1/mdb/mongodb_validation.go | 2 - api/v1/mdb/sharded_cluster_validation.go | 32 ----------- api/v1/mdb/sharded_cluster_validation_test.go | 54 ------------------- .../mongodbshardedcluster_controller.go | 6 +-- ...odbshardedcluster_controller_multi_test.go | 1 + 5 files changed, 4 insertions(+), 91 deletions(-) diff --git a/api/v1/mdb/mongodb_validation.go b/api/v1/mdb/mongodb_validation.go index fee442ea8..d33d3167c 100644 --- a/api/v1/mdb/mongodb_validation.go +++ b/api/v1/mdb/mongodb_validation.go @@ -472,8 +472,6 @@ func (m *MongoDB) RunValidations(old *MongoDB) []v1.ValidationResult { } } } - - updateValidators = append(updateValidators, ShardedClusterMultiUpdateValidators()...) } else { for _, validator := range ShardedClusterSingleValidators() { res := validator(*m) diff --git a/api/v1/mdb/sharded_cluster_validation.go b/api/v1/mdb/sharded_cluster_validation.go index 682774e9a..2b24a597c 100644 --- a/api/v1/mdb/sharded_cluster_validation.go +++ b/api/v1/mdb/sharded_cluster_validation.go @@ -43,14 +43,6 @@ func ShardedClusterMultiValidators() []func(m MongoDB) []v1.ValidationResult { } } -func ShardedClusterMultiUpdateValidators() []func(newObj, oldObj MongoDbSpec) v1.ValidationResult { - return []func(newObj, oldObj MongoDbSpec) v1.ValidationResult{ - nonEmptyShardClusterSpecItemRemoval, - nonEmptyConfigSrvClusterSpecItemRemoval, - nonEmptyMongosClusterSpecItemRemoval, - } -} - // This applies to any topology func shardCountSpecified(m MongoDB) v1.ValidationResult { if m.Spec.ShardCount == 0 { @@ -382,27 +374,3 @@ func validateMemberClusterIsSubsetOfKubeConfig(m MongoDB) v1.ValidationResult { return v1.ValidationSuccess() } - -func nonEmptyShardClusterSpecItemRemoval(newObj, oldObj MongoDbSpec) v1.ValidationResult { - return nonEmptyClusterSpecItemRemoval("shard", newObj.ShardSpec, oldObj.ShardSpec) -} - -func nonEmptyConfigSrvClusterSpecItemRemoval(newObj, oldObj MongoDbSpec) v1.ValidationResult { - return nonEmptyClusterSpecItemRemoval("configSrv", newObj.ConfigSrvSpec, oldObj.ConfigSrvSpec) -} - -func nonEmptyMongosClusterSpecItemRemoval(newObj, oldObj MongoDbSpec) v1.ValidationResult { - return nonEmptyClusterSpecItemRemoval("mongos", newObj.MongosSpec, oldObj.MongosSpec) -} - -// This validation blocks removing ClusterSpecItem if members count is more than zero (ShardedCluster). If user wants to remove -// the cluster, they should first scale down members to zero and then remove the ClusterSpecItem. -func nonEmptyClusterSpecItemRemoval(kind string, newClusterSpec, oldClusterSpec *ShardedClusterComponentSpec) v1.ValidationResult { - for _, oldClusterSpecItem := range oldClusterSpec.ClusterSpecList { - if !newClusterSpec.ClusterSpecItemExists(oldClusterSpecItem.ClusterName) && oldClusterSpecItem.Members > 0 { - return v1.ValidationError("Cannot remove %s cluster %s with non-zero members count. Please scale down members to zero first", kind, oldClusterSpecItem.ClusterName) - } - } - - return v1.ValidationSuccess() -} diff --git a/api/v1/mdb/sharded_cluster_validation_test.go b/api/v1/mdb/sharded_cluster_validation_test.go index 132befc30..6918c4d16 100644 --- a/api/v1/mdb/sharded_cluster_validation_test.go +++ b/api/v1/mdb/sharded_cluster_validation_test.go @@ -668,57 +668,3 @@ users: t.Setenv(multicluster.KubeConfigPathEnv, file.Name()) return file } - -func TestNonEmptyShardClusterSpecItemRemoval(t *testing.T) { - scOld := NewDefaultMultiShardedClusterBuilder().Build() - - scNew := NewDefaultMultiShardedClusterBuilder().Build() - // Removing `test-cluster-0` with 2 members from the ClusterSpecList - scNew.Spec.ShardSpec.ClusterSpecList = ClusterSpecList{ - { - ClusterName: "test-cluster-1", - Members: 3, - }, - } - - _, err := scNew.ValidateUpdate(scOld) - - require.Error(t, err) - assert.Equal(t, "Cannot remove shard cluster test-cluster-0 with non-zero members count. Please scale down members to zero first", err.Error()) -} - -func TestNonEmptyConfigSrcClusterSpecItemRemoval(t *testing.T) { - scOld := NewDefaultMultiShardedClusterBuilder().Build() - - scNew := NewDefaultMultiShardedClusterBuilder().Build() - // Removing `test-cluster-0` with 2 members from the ClusterSpecList - scNew.Spec.ConfigSrvSpec.ClusterSpecList = ClusterSpecList{ - { - ClusterName: "test-cluster-1", - Members: 3, - }, - } - - _, err := scNew.ValidateUpdate(scOld) - - require.Error(t, err) - assert.Equal(t, "Cannot remove configSrv cluster test-cluster-0 with non-zero members count. Please scale down members to zero first", err.Error()) -} - -func TestNonEmptyMongosClusterSpecItemRemoval(t *testing.T) { - scOld := NewDefaultMultiShardedClusterBuilder().Build() - - scNew := NewDefaultMultiShardedClusterBuilder().Build() - // Removing `test-cluster-0` with 2 members from the ClusterSpecList - scNew.Spec.MongosSpec.ClusterSpecList = ClusterSpecList{ - { - ClusterName: "test-cluster-1", - Members: 3, - }, - } - - _, err := scNew.ValidateUpdate(scOld) - - require.Error(t, err) - assert.Equal(t, "Cannot remove mongos cluster test-cluster-0 with non-zero members count. Please scale down members to zero first", err.Error()) -} diff --git a/controllers/operator/mongodbshardedcluster_controller.go b/controllers/operator/mongodbshardedcluster_controller.go index cc78ab187..faf38aeb3 100644 --- a/controllers/operator/mongodbshardedcluster_controller.go +++ b/controllers/operator/mongodbshardedcluster_controller.go @@ -2927,20 +2927,20 @@ func (r *ShardedClusterReconcileHelper) getHealthyShardsProcesses() ([]string, [ func (r *ShardedClusterReconcileHelper) blockNonEmptyClusterSpecItemRemoval() error { for shardIdx, shardClusters := range r.shardsMemberClustersMap { for _, shardCluster := range shardClusters { - if !r.sc.Spec.ShardSpec.ClusterSpecItemExists(shardCluster.Name) && shardCluster.Replicas > 0 { + if !r.desiredShardsConfiguration[shardIdx].ClusterSpecItemExists(shardCluster.Name) && shardCluster.Replicas > 0 { return xerrors.Errorf("Cannot remove shard member cluster %s with non-zero members count in shard %d. Please scale down members to zero first", shardCluster.Name, shardIdx) } } } for _, configSrvCluster := range r.configSrvMemberClusters { - if !r.sc.Spec.ConfigSrvSpec.ClusterSpecItemExists(configSrvCluster.Name) && configSrvCluster.Replicas > 0 { + if !r.desiredConfigServerConfiguration.ClusterSpecItemExists(configSrvCluster.Name) && configSrvCluster.Replicas > 0 { return xerrors.Errorf("Cannot remove configSrv member cluster %s with non-zero members count. Please scale down members to zero first", configSrvCluster.Name) } } for _, mongosCluster := range r.mongosMemberClusters { - if !r.sc.Spec.MongosSpec.ClusterSpecItemExists(mongosCluster.Name) && mongosCluster.Replicas > 0 { + if !r.desiredMongosConfiguration.ClusterSpecItemExists(mongosCluster.Name) && mongosCluster.Replicas > 0 { return xerrors.Errorf("Cannot remove mongos member cluster %s with non-zero members count. Please scale down members to zero first", mongosCluster.Name) } } diff --git a/controllers/operator/mongodbshardedcluster_controller_multi_test.go b/controllers/operator/mongodbshardedcluster_controller_multi_test.go index f8a60a492..853992ef9 100644 --- a/controllers/operator/mongodbshardedcluster_controller_multi_test.go +++ b/controllers/operator/mongodbshardedcluster_controller_multi_test.go @@ -447,6 +447,7 @@ func TestBlockNonEmptyClusterSpecItemRemoval(t *testing.T) { // but the shard index can be 0, 1 or 2 depending on map iteration order. Thus, we only check the error substring here. expectedError: "Cannot remove shard member cluster member-cluster-1 with non-zero members count in shard", }, + // TODO: add test case for sharded overrides { name: "Removing non-zero configSrv ClusterSpecItem blocks reconciliation", initialState: MultiClusterShardedScalingStep{ From 11e67b3132b27d2966ed482df37c197c690481a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Kara=C5=9B?= Date: Mon, 6 Oct 2025 15:06:04 +0200 Subject: [PATCH 07/12] Add shard override tests + fix unrelated unit test --- controllers/operator/authentication_test.go | 19 ++-- ...odbshardedcluster_controller_multi_test.go | 98 ++++++++++++++++++- .../mongodbshardedcluster_controller_test.go | 44 ++++----- 3 files changed, 126 insertions(+), 35 deletions(-) diff --git a/controllers/operator/authentication_test.go b/controllers/operator/authentication_test.go index 42959d010..d467b4dac 100644 --- a/controllers/operator/authentication_test.go +++ b/controllers/operator/authentication_test.go @@ -65,7 +65,7 @@ func TestX509ClusterAuthentication_CanBeEnabled_IfX509AuthenticationIsEnabled_Sh ctx := context.Background() scWithTls := test.DefaultClusterBuilder().EnableTLS().EnableX509().SetName("sc-with-tls").SetTLSCA("custom-ca").Build() - reconciler, _, client, _, err := defaultClusterReconciler(ctx, nil, "", "", scWithTls, nil) + reconciler, _, client, _, err := defaultShardedClusterReconciler(ctx, nil, "", "", scWithTls, nil) require.NoError(t, err) addKubernetesTlsResources(ctx, client, scWithTls) @@ -76,7 +76,7 @@ func TestX509CanBeEnabled_WhenThereAreOnlyTlsDeployments_ShardedCluster(t *testi ctx := context.Background() scWithTls := test.DefaultClusterBuilder().EnableTLS().EnableX509().SetName("sc-with-tls").SetTLSCA("custom-ca").Build() - reconciler, _, client, _, err := defaultClusterReconciler(ctx, nil, "", "", scWithTls, nil) + reconciler, _, client, _, err := defaultShardedClusterReconciler(ctx, nil, "", "", scWithTls, nil) require.NoError(t, err) addKubernetesTlsResources(ctx, client, scWithTls) @@ -333,7 +333,7 @@ func TestX509InternalClusterAuthentication_CanBeEnabledWithScram_ShardedCluster( EnableX509InternalClusterAuth(). Build() - r, _, kubeClient, omConnectionFactory, _ := defaultClusterReconciler(ctx, nil, "", "", sc, nil) + r, _, kubeClient, omConnectionFactory, _ := defaultShardedClusterReconciler(ctx, nil, "", "", sc, nil) addKubernetesTlsResources(ctx, r.client, sc) checkReconcileSuccessful(ctx, t, r, sc, kubeClient) @@ -770,15 +770,16 @@ func Test_NoAdditionalDomainsPresent(t *testing.T) { // The default secret we create does not contain additional domains so it will not be valid for this RS rs.Spec.Security.TLSConfig.AdditionalCertificateDomains = []string{"foo"} - reconciler, _, client, _, err := defaultClusterReconciler(ctx, nil, "", "", rs, nil) - require.NoError(t, err) - addKubernetesTlsResources(ctx, client, rs) + kubeClient, omConnectionFactory := mock.NewDefaultFakeClient(rs) + reconciler := newReplicaSetReconciler(ctx, kubeClient, nil, "", "", false, false, omConnectionFactory.GetConnectionFunc) + addKubernetesTlsResources(ctx, kubeClient, rs) - secret := &corev1.Secret{} + certSecret := &corev1.Secret{} - _ = client.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-cert", rs.Name), Namespace: rs.Namespace}, secret) + _ = kubeClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-cert", rs.Name), Namespace: rs.Namespace}, certSecret) - err = certs.VerifyAndEnsureCertificatesForStatefulSet(ctx, reconciler.SecretClient, reconciler.SecretClient, fmt.Sprintf("%s-cert", rs.Name), certs.ReplicaSetConfig(*rs), nil) + err := certs.VerifyAndEnsureCertificatesForStatefulSet(ctx, reconciler.SecretClient, reconciler.SecretClient, fmt.Sprintf("%s-cert", rs.Name), certs.ReplicaSetConfig(*rs), nil) + require.Error(t, err) for i := 0; i < rs.Spec.Members; i++ { expectedErrorMessage := fmt.Sprintf("domain %s-%d.foo is not contained in the list of DNSNames", rs.Name, i) assert.Contains(t, err.Error(), expectedErrorMessage) diff --git a/controllers/operator/mongodbshardedcluster_controller_multi_test.go b/controllers/operator/mongodbshardedcluster_controller_multi_test.go index 853992ef9..eb08b83d8 100644 --- a/controllers/operator/mongodbshardedcluster_controller_multi_test.go +++ b/controllers/operator/mongodbshardedcluster_controller_multi_test.go @@ -415,8 +415,6 @@ func TestBlockNonEmptyClusterSpecItemRemoval(t *testing.T) { }, }, }, - // TODO: this still fails with panic, because the r.allShardsMemberClusters does not contain previous cluster - // This is likely another bug { name: "Removing non-zero shard ClusterSpecItem blocks reconciliation", initialState: MultiClusterShardedScalingStep{ @@ -447,7 +445,99 @@ func TestBlockNonEmptyClusterSpecItemRemoval(t *testing.T) { // but the shard index can be 0, 1 or 2 depending on map iteration order. Thus, we only check the error substring here. expectedError: "Cannot remove shard member cluster member-cluster-1 with non-zero members count in shard", }, - // TODO: add test case for sharded overrides + { + name: "Removing non-zero shard ClusterSpecItem from shard override blocks reconciliation", + initialState: MultiClusterShardedScalingStep{ + shardCount: 3, + configServerDistribution: map[string]int{ + cluster1: 2, cluster2: 1, cluster3: 2, + }, + mongosDistribution: map[string]int{ + cluster1: 1, cluster2: 1, cluster3: 2, + }, + shardDistribution: map[string]int{ + cluster1: 1, cluster2: 1, cluster3: 3, + }, + }, + targetState: MultiClusterShardedScalingStep{ + shardCount: 3, + configServerDistribution: map[string]int{ + cluster1: 2, cluster2: 1, cluster3: 2, + }, + mongosDistribution: map[string]int{ + cluster1: 1, cluster2: 1, cluster3: 2, + }, + shardDistribution: map[string]int{ + cluster1: 1, cluster2: 1, cluster3: 3, + }, + shardOverrides: []map[string]int{ + { + cluster1: 1, cluster2: 1, cluster3: 3, + }, + { + cluster1: 1, cluster2: 1, cluster3: 3, + }, + // Dropping cluster1 from shard 2 + { + cluster2: 1, cluster3: 3, + }, + }, + }, + // Full error message `Cannot remove shard member cluster member-cluster-1 with non-zero members count in shard [x]. Please scale down members to zero first`, + // but the shard index can be 0, 1 or 2 depending on map iteration order. Thus, we only check the error substring here. + expectedError: "Cannot remove shard member cluster member-cluster-1 with non-zero members count in shard 2. Please scale down members to zero first", + }, + { + name: "Removing zero-member shard ClusterSpecItem from shard override doesn't block reconciliation", + initialState: MultiClusterShardedScalingStep{ + shardCount: 3, + configServerDistribution: map[string]int{ + cluster1: 2, cluster2: 1, cluster3: 2, + }, + mongosDistribution: map[string]int{ + cluster1: 1, cluster2: 1, cluster3: 2, + }, + shardDistribution: map[string]int{ + cluster1: 1, cluster2: 1, cluster3: 3, + }, + shardOverrides: []map[string]int{ + { + cluster1: 1, cluster2: 2, cluster3: 3, + }, + { + cluster1: 1, cluster2: 0, cluster3: 3, + }, + { + cluster1: 1, cluster2: 2, cluster3: 3, + }, + }, + }, + targetState: MultiClusterShardedScalingStep{ + shardCount: 3, + configServerDistribution: map[string]int{ + cluster1: 2, cluster2: 1, cluster3: 2, + }, + mongosDistribution: map[string]int{ + cluster1: 1, cluster2: 1, cluster3: 2, + }, + // Dropping cluster2 + shardDistribution: map[string]int{ + cluster1: 1, cluster3: 3, + }, + shardOverrides: []map[string]int{ + { + cluster1: 1, cluster2: 2, cluster3: 3, + }, + // Dropping cluster2 from shard 0 + { + cluster1: 1, cluster3: 3, + }, + { + cluster1: 1, cluster2: 2, cluster3: 3, + }, + }, + }, + }, { name: "Removing non-zero configSrv ClusterSpecItem blocks reconciliation", initialState: MultiClusterShardedScalingStep{ @@ -2785,7 +2875,7 @@ func TestComputeMembersToScaleDown(t *testing.T) { _, omConnectionFactory := mock.NewDefaultFakeClient(targetSpec) memberClusterMap := getFakeMultiClusterMapWithClusters(memberClusterNames, omConnectionFactory) - _, reconcileHelper, _, _, err := defaultClusterReconciler(ctx, nil, "", "", targetSpec, memberClusterMap) + _, reconcileHelper, _, _, err := defaultShardedClusterReconciler(ctx, nil, "", "", targetSpec, memberClusterMap) assert.NoError(t, err) membersToScaleDown := reconcileHelper.computeMembersToScaleDown(tc.cfgServerCurrentClusters, tc.shardsCurrentClusters, zap.S()) diff --git a/controllers/operator/mongodbshardedcluster_controller_test.go b/controllers/operator/mongodbshardedcluster_controller_test.go index 12ca849b8..d7b2aefe4 100644 --- a/controllers/operator/mongodbshardedcluster_controller_test.go +++ b/controllers/operator/mongodbshardedcluster_controller_test.go @@ -54,7 +54,7 @@ import ( func TestChangingFCVShardedCluster(t *testing.T) { ctx := context.Background() sc := test.DefaultClusterBuilder().Build() - reconciler, _, cl, _, err := defaultClusterReconciler(ctx, nil, "", "", sc, nil) + reconciler, _, cl, _, err := defaultShardedClusterReconciler(ctx, nil, "", "", sc, nil) require.NoError(t, err) // Helper function to update and verify FCV @@ -76,7 +76,7 @@ func TestReconcileCreateShardedCluster(t *testing.T) { ctx := context.Background() sc := test.DefaultClusterBuilder().Build() - reconciler, _, kubeClient, omConnectionFactory, err := defaultClusterReconciler(ctx, nil, "", "", sc, nil) + reconciler, _, kubeClient, omConnectionFactory, err := defaultShardedClusterReconciler(ctx, nil, "", "", sc, nil) c := kubeClient require.NoError(t, err) @@ -220,7 +220,7 @@ func TestReconcileCreateShardedCluster_ScaleDown(t *testing.T) { ctx := context.Background() // First creation sc := test.DefaultClusterBuilder().SetShardCountSpec(4).SetShardCountStatus(4).Build() - reconciler, _, clusterClient, omConnectionFactory, err := defaultClusterReconciler(ctx, nil, "", "", sc, nil) + reconciler, _, clusterClient, omConnectionFactory, err := defaultShardedClusterReconciler(ctx, nil, "", "", sc, nil) require.NoError(t, err) checkReconcileSuccessful(ctx, t, reconciler, sc, clusterClient) @@ -261,7 +261,7 @@ func TestShardedClusterReconcileContainerImages(t *testing.T) { ctx := context.Background() sc := test.DefaultClusterBuilder().SetVersion("8.0.0").SetShardCountSpec(1).Build() - reconciler, _, kubeClient, _, err := defaultClusterReconciler(ctx, imageUrlsMock, "2.0.0", "1.0.0", sc, nil) + reconciler, _, kubeClient, _, err := defaultShardedClusterReconciler(ctx, imageUrlsMock, "2.0.0", "1.0.0", sc, nil) require.NoError(t, err) checkReconcileSuccessful(ctx, t, reconciler, sc, kubeClient) @@ -299,7 +299,7 @@ func TestShardedClusterReconcileContainerImagesWithStaticArchitecture(t *testing databaseRelatedImageEnv: "quay.io/mongodb/mongodb-enterprise-server:@sha256:MONGODB_DATABASE", } - reconciler, _, kubeClient, omConnectionFactory, err := defaultClusterReconciler(ctx, imageUrlsMock, "", "", sc, nil) + reconciler, _, kubeClient, omConnectionFactory, err := defaultShardedClusterReconciler(ctx, imageUrlsMock, "", "", sc, nil) require.NoError(t, err) omConnectionFactory.SetPostCreateHook(func(connection om.Connection) { @@ -339,7 +339,7 @@ func TestReconcilePVCResizeShardedCluster(t *testing.T) { sc.Spec.Persistent = util.BooleanRef(true) sc.Spec.ConfigSrvPodSpec.Persistence = &persistence sc.Spec.ShardPodSpec.Persistence = &persistence - reconciler, _, c, _, err := defaultClusterReconciler(ctx, nil, "", "", sc, nil) + reconciler, _, c, _, err := defaultShardedClusterReconciler(ctx, nil, "", "", sc, nil) assert.NoError(t, err) // first, we create the shardedCluster with sts and pvc, @@ -500,7 +500,7 @@ func TestAddDeleteShardedCluster(t *testing.T) { // First we need to create a sharded cluster sc := test.DefaultClusterBuilder().Build() - reconciler, _, clusterClient, omConnectionFactory, err := defaultClusterReconciler(ctx, nil, "", "", sc, nil) + reconciler, _, clusterClient, omConnectionFactory, err := defaultShardedClusterReconciler(ctx, nil, "", "", sc, nil) omConnectionFactory.SetPostCreateHook(func(connection om.Connection) { connection.(*om.MockedOmConnection).AgentsDelayCount = 1 }) @@ -649,7 +649,7 @@ func TestConstructConfigSrv(t *testing.T) { func TestPrepareScaleDownShardedCluster_OnlyMongos(t *testing.T) { ctx := context.Background() sc := test.DefaultClusterBuilder().SetMongosCountStatus(4).SetMongosCountSpec(2).Build() - _, reconcileHelper, _, omConnectionFactory, _ := defaultClusterReconciler(ctx, nil, "", "", sc, nil) + _, reconcileHelper, _, omConnectionFactory, _ := defaultShardedClusterReconciler(ctx, nil, "", "", sc, nil) oldDeployment := createDeploymentFromShardedCluster(t, sc) omConnectionFactory.SetPostCreateHook(func(connection om.Connection) { if _, err := connection.UpdateDeployment(oldDeployment); err != nil { @@ -804,7 +804,7 @@ func TestShardedCluster_WithTLSEnabled_AndX509Enabled_Succeeds(t *testing.T) { SetTLSCA("custom-ca"). Build() - reconciler, _, clusterClient, _, err := defaultClusterReconciler(ctx, nil, "", "", sc, nil) + reconciler, _, clusterClient, _, err := defaultShardedClusterReconciler(ctx, nil, "", "", sc, nil) require.NoError(t, err) addKubernetesTlsResources(ctx, clusterClient, sc) @@ -823,7 +823,7 @@ func TestShardedCluster_NeedToPublishState(t *testing.T) { Build() // perform successful reconciliation to populate all the stateful sets in the mocked client - reconciler, reconcilerHelper, clusterClient, _, err := defaultClusterReconciler(ctx, nil, "", "", sc, nil) + reconciler, reconcilerHelper, clusterClient, _, err := defaultShardedClusterReconciler(ctx, nil, "", "", sc, nil) require.NoError(t, err) addKubernetesTlsResources(ctx, clusterClient, sc) actualResult, err := reconciler.Reconcile(ctx, requestFromObject(sc)) @@ -899,7 +899,7 @@ func TestShardedCustomPodSpecTemplate(t *testing.T) { Spec: configSrvPodSpec, }).Build() - reconciler, _, kubeClient, _, err := defaultClusterReconciler(ctx, nil, "", "", sc, nil) + reconciler, _, kubeClient, _, err := defaultShardedClusterReconciler(ctx, nil, "", "", sc, nil) require.NoError(t, err) addKubernetesTlsResources(ctx, kubeClient, sc) @@ -998,7 +998,7 @@ func TestShardedCustomPodStaticSpecTemplate(t *testing.T) { Spec: configSrvPodSpec, }).Build() - reconciler, _, kubeClient, _, err := defaultClusterReconciler(ctx, nil, "", "", sc, nil) + reconciler, _, kubeClient, _, err := defaultShardedClusterReconciler(ctx, nil, "", "", sc, nil) require.NoError(t, err) addKubernetesTlsResources(ctx, kubeClient, sc) @@ -1169,7 +1169,7 @@ func TestScalingShardedCluster_ScalesOneMemberAtATime_WhenScalingDown(t *testing SetShardCountStatus(3). Build() - reconciler, _, clusterClient, _, err := defaultClusterReconciler(ctx, nil, "", "", sc, nil) + reconciler, _, clusterClient, _, err := defaultShardedClusterReconciler(ctx, nil, "", "", sc, nil) require.NoError(t, err) // perform initial reconciliation so we are not creating a new resource checkReconcileSuccessful(ctx, t, reconciler, sc, clusterClient) @@ -1287,7 +1287,7 @@ func TestShardedClusterPortsAreConfigurable_WithAdditionalMongoConfig(t *testing SetShardAdditionalConfig(shardConfig). Build() - reconciler, _, clusterClient, _, err := defaultClusterReconciler(ctx, nil, "", "", sc, nil) + reconciler, _, clusterClient, _, err := defaultShardedClusterReconciler(ctx, nil, "", "", sc, nil) require.NoError(t, err) checkReconcileSuccessful(ctx, t, reconciler, sc, clusterClient) @@ -1317,7 +1317,7 @@ func TestShardedCluster_ConfigMapAndSecretWatched(t *testing.T) { ctx := context.Background() sc := test.DefaultClusterBuilder().Build() - reconciler, _, clusterClient, _, err := defaultClusterReconciler(ctx, nil, "", "", sc, nil) + reconciler, _, clusterClient, _, err := defaultShardedClusterReconciler(ctx, nil, "", "", sc, nil) require.NoError(t, err) checkReconcileSuccessful(ctx, t, reconciler, sc, clusterClient) @@ -1336,7 +1336,7 @@ func TestShardedClusterTLSAndInternalAuthResourcesWatched(t *testing.T) { ctx := context.Background() sc := test.DefaultClusterBuilder().SetShardCountSpec(1).EnableTLS().SetTLSCA("custom-ca").Build() sc.Spec.Security.Authentication.InternalCluster = "x509" - reconciler, _, clusterClient, _, err := defaultClusterReconciler(ctx, nil, "", "", sc, nil) + reconciler, _, clusterClient, _, err := defaultShardedClusterReconciler(ctx, nil, "", "", sc, nil) require.NoError(t, err) addKubernetesTlsResources(ctx, clusterClient, sc) @@ -1384,7 +1384,7 @@ func TestBackupConfiguration_ShardedCluster(t *testing.T) { }). Build() - reconciler, _, clusterClient, omConnectionFactory, err := defaultClusterReconciler(ctx, nil, "", "", sc, nil) + reconciler, _, clusterClient, omConnectionFactory, err := defaultShardedClusterReconciler(ctx, nil, "", "", sc, nil) require.NoError(t, err) omConnectionFactory.SetPostCreateHook(func(c om.Connection) { // 4 because config server + num shards + 1 for entity to represent the sharded cluster itself @@ -1509,7 +1509,7 @@ func TestTlsConfigPrefix_ForShardedCluster(t *testing.T) { }). Build() - reconciler, _, clusterClient, _, err := defaultClusterReconciler(ctx, nil, "", "", sc, nil) + reconciler, _, clusterClient, _, err := defaultShardedClusterReconciler(ctx, nil, "", "", sc, nil) require.NoError(t, err) createShardedClusterTLSSecretsFromCustomCerts(ctx, sc, "my-prefix", clusterClient) @@ -1553,7 +1553,7 @@ func TestShardSpecificPodSpec(t *testing.T) { }, }).Build() - reconciler, _, clusterClient, _, err := defaultClusterReconciler(ctx, nil, "", "", sc, nil) + reconciler, _, clusterClient, _, err := defaultShardedClusterReconciler(ctx, nil, "", "", sc, nil) require.NoError(t, err) addKubernetesTlsResources(ctx, clusterClient, sc) checkReconcileSuccessful(ctx, t, reconciler, sc, clusterClient) @@ -1577,7 +1577,7 @@ func TestShardedClusterAgentVersionMapping(t *testing.T) { reconcilerFactory := func(sc *mdbv1.MongoDB) (reconcile.Reconciler, kubernetesClient.Client) { // Go couldn't infer correctly that *ReconcileMongoDbShardedCluster implemented *reconciler.Reconciler interface // without this anonymous function - reconciler, _, mockClient, _, err := defaultClusterReconciler(ctx, nil, "", "", sc, nil) + reconciler, _, mockClient, _, err := defaultShardedClusterReconciler(ctx, nil, "", "", sc, nil) require.NoError(t, err) return reconciler, mockClient } @@ -1679,7 +1679,7 @@ func createDeploymentFromShardedCluster(t *testing.T, updatable v1.CustomResourc return d } -func defaultClusterReconciler(ctx context.Context, imageUrls images.ImageUrls, initDatabaseNonStaticImageVersion, databaseNonStaticImageVersion string, sc *mdbv1.MongoDB, globalMemberClustersMap map[string]client.Client) (*ReconcileMongoDbShardedCluster, *ShardedClusterReconcileHelper, kubernetesClient.Client, *om.CachedOMConnectionFactory, error) { +func defaultShardedClusterReconciler(ctx context.Context, imageUrls images.ImageUrls, initDatabaseNonStaticImageVersion, databaseNonStaticImageVersion string, sc *mdbv1.MongoDB, globalMemberClustersMap map[string]client.Client) (*ReconcileMongoDbShardedCluster, *ShardedClusterReconcileHelper, kubernetesClient.Client, *om.CachedOMConnectionFactory, error) { kubeClient, omConnectionFactory := mock.NewDefaultFakeClient(sc) r, reconcileHelper, err := newShardedClusterReconcilerFromResource(ctx, imageUrls, initDatabaseNonStaticImageVersion, databaseNonStaticImageVersion, sc, globalMemberClustersMap, kubeClient, omConnectionFactory) if err != nil { @@ -1765,7 +1765,7 @@ func SingleClusterShardedScalingWithOverridesTestCase(t *testing.T, tc SingleClu for _, scalingStep := range tc.scalingSteps { t.Run(scalingStep.name, func(t *testing.T) { - reconciler, reconcilerHelper, kubeClient, omConnectionFactory, err := defaultClusterReconciler(ctx, nil, "", "", sc, nil) + reconciler, reconcilerHelper, kubeClient, omConnectionFactory, err := defaultShardedClusterReconciler(ctx, nil, "", "", sc, nil) _ = omConnectionFactory.GetConnectionFunc(&om.OMContext{GroupName: om.TestGroupName}) require.NoError(t, err) clusterMapping := reconcilerHelper.deploymentState.ClusterMapping From ddb9e57a2f3c7579b0754127cd6e62f7166b0630 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Kara=C5=9B?= Date: Mon, 6 Oct 2025 16:37:16 +0200 Subject: [PATCH 08/12] Add changelog + revert shardOverrides fix (other PR) --- ...x_block_removing_non_zero_member_cluster_from.md | 13 +++++++++++++ .../operator/mongodbshardedcluster_controller.go | 3 ++- 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 changelog/20251006_fix_block_removing_non_zero_member_cluster_from.md diff --git a/changelog/20251006_fix_block_removing_non_zero_member_cluster_from.md b/changelog/20251006_fix_block_removing_non_zero_member_cluster_from.md new file mode 100644 index 000000000..904c093f9 --- /dev/null +++ b/changelog/20251006_fix_block_removing_non_zero_member_cluster_from.md @@ -0,0 +1,13 @@ +--- +kind: fix +date: 2025-10-06 +--- + +* **MultiClusterSharded**: Block removing non-zero member cluster from MongoDB resource. This prevents from scaling down member cluster without current configuration available, which can lead to unexpected issues. Previously operator was crashing in that scenario, after the fix it will mark reconciliation as `Failed` with appropriate message. Example unsafe scenario that is now blocked: + * User has 2 member clusters: `main` is used for application traffic, `read-analytics` is used for read-only analytics + * `main` cluster has 7 voting members + * `read-analytics` cluster has 3 non-voting members + * User decides to remove `read-analytics` cluster, by removing the `clusterSpecItem` completely + * Operator scales down members from `read-analytics` cluster one by one + * Because the configuration does not have voting options specified anymore and by default `priority` is set to 1, the operator will remove one member, but the other two members will be reconfigured as voting members + * `replicaset` contains now 9 voting members, which is not [supported by MongoDB](https://www.mongodb.com/docs/manual/reference/limits/#mongodb-limit-Number-of-Voting-Members-of-a-Replica-Set) diff --git a/controllers/operator/mongodbshardedcluster_controller.go b/controllers/operator/mongodbshardedcluster_controller.go index faf38aeb3..87ac8bc5b 100644 --- a/controllers/operator/mongodbshardedcluster_controller.go +++ b/controllers/operator/mongodbshardedcluster_controller.go @@ -298,6 +298,7 @@ func (r *ShardedClusterReconcileHelper) getShardNameToShardIdxMap() map[string]i // The reconciler implementation should refer to this structure only without taking into consideration complexities of MongoDbSpec wrt sharded clusters. func (r *ShardedClusterReconcileHelper) prepareDesiredShardsConfiguration() map[int]*mdbv1.ShardedClusterComponentSpec { spec := r.sc.Spec.DeepCopy() + spec.ShardSpec.ClusterSpecList = spec.GetShardClusterSpecList() // We don't need to do the same for shardOverrides for single-cluster as shardOverrides[].ClusterSpecList can be set only for Multi-Cluster mode. // And we don't need that artificial legacy cluster as for single-cluster all necessary configuration is defined top-level. @@ -311,7 +312,7 @@ func (r *ShardedClusterReconcileHelper) prepareDesiredShardsConfiguration() map[ topLevelPersistenceOverride, topLevelPodSpecOverride := getShardTopLevelOverrides(spec, shardIdx) shardComponentSpec := *spec.ShardSpec.DeepCopy() - shardComponentSpec.ClusterSpecList = processClusterSpecList(spec.GetShardClusterSpecList(), topLevelPodSpecOverride, topLevelPersistenceOverride) + shardComponentSpec.ClusterSpecList = processClusterSpecList(shardComponentSpec.ClusterSpecList, topLevelPodSpecOverride, topLevelPersistenceOverride) shardComponentSpecs[shardIdx] = &shardComponentSpec } From 31c346ee4770c3caabb620f3e7ad3f38ebdc4d3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Kara=C5=9B?= Date: Mon, 6 Oct 2025 17:08:53 +0200 Subject: [PATCH 09/12] fixes --- api/v1/mdb/shardedcluster.go | 1 - .../construct/database_construction.go | 1 - .../mongodbshardedcluster_controller.go | 22 ++++++++++++------- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/api/v1/mdb/shardedcluster.go b/api/v1/mdb/shardedcluster.go index 18b55b9b3..0ff8c0472 100644 --- a/api/v1/mdb/shardedcluster.go +++ b/api/v1/mdb/shardedcluster.go @@ -101,7 +101,6 @@ func (s *ShardedClusterComponentSpec) GetClusterSpecItem(clusterName string) Clu } // it should never occur - we preprocess all clusterSpecLists - // TODO: this can actually happen if desired cluster is removed, but the deployment state is not yet updated and contains the old cluster config panic(fmt.Errorf("clusterName %s not found in clusterSpecList", clusterName)) } diff --git a/controllers/operator/construct/database_construction.go b/controllers/operator/construct/database_construction.go index 546e5a7d1..19fea8c99 100644 --- a/controllers/operator/construct/database_construction.go +++ b/controllers/operator/construct/database_construction.go @@ -216,7 +216,6 @@ func ReplicaSetOptions(additionalOpts ...func(options *DatabaseStatefulSetOption // shardedOptions group the shared logic for creating Shard, Config servers, and mongos options func shardedOptions(cfg shardedOptionCfg, additionalOpts ...func(options *DatabaseStatefulSetOptions)) DatabaseStatefulSetOptions { - // TODO: we should read from r.desiredConfigServerConfiguration.GetClusterSpecItem clusterComponentSpec := cfg.componentSpec.GetClusterSpecItem(cfg.memberClusterName) statefulSetConfiguration := clusterComponentSpec.StatefulSetConfiguration var statefulSetSpecOverride *appsv1.StatefulSetSpec diff --git a/controllers/operator/mongodbshardedcluster_controller.go b/controllers/operator/mongodbshardedcluster_controller.go index 87ac8bc5b..3e66f1786 100644 --- a/controllers/operator/mongodbshardedcluster_controller.go +++ b/controllers/operator/mongodbshardedcluster_controller.go @@ -511,20 +511,24 @@ func extractOverridesFromPodSpec(podSpec *mdbv1.MongoDbPodSpec) (*corev1.PodTemp // We share the same logic and data structures used for Config Server, although some fields are not relevant for mongos // e.g MemberConfig. They will simply be ignored when the database is constructed func (r *ShardedClusterReconcileHelper) prepareDesiredMongosConfiguration() *mdbv1.ShardedClusterComponentSpec { - topLevelPodSpecOverride, topLevelPersistenceOverride := extractOverridesFromPodSpec(r.sc.Spec.MongosPodSpec) + spec := r.sc.Spec.DeepCopy() + spec.MongosSpec.ClusterSpecList = spec.GetMongosClusterSpecList() - mongosComponentSpec := r.sc.Spec.MongosSpec.DeepCopy() - mongosComponentSpec.ClusterSpecList = processClusterSpecList(r.sc.Spec.GetMongosClusterSpecList(), topLevelPodSpecOverride, topLevelPersistenceOverride) + topLevelPodSpecOverride, topLevelPersistenceOverride := extractOverridesFromPodSpec(spec.MongosPodSpec) + mongosComponentSpec := spec.MongosSpec.DeepCopy() + mongosComponentSpec.ClusterSpecList = processClusterSpecList(mongosComponentSpec.ClusterSpecList, topLevelPodSpecOverride, topLevelPersistenceOverride) return mongosComponentSpec } // prepareDesiredConfigServerConfiguration works the same way as prepareDesiredMongosConfiguration, but for config server func (r *ShardedClusterReconcileHelper) prepareDesiredConfigServerConfiguration() *mdbv1.ShardedClusterComponentSpec { - topLevelPodSpecOverride, topLevelPersistenceOverride := extractOverridesFromPodSpec(r.sc.Spec.ConfigSrvPodSpec) + spec := r.sc.Spec.DeepCopy() + spec.ConfigSrvSpec.ClusterSpecList = spec.GetConfigSrvClusterSpecList() - configSrvComponentSpec := r.sc.Spec.ConfigSrvSpec.DeepCopy() - configSrvComponentSpec.ClusterSpecList = processClusterSpecList(r.sc.Spec.GetConfigSrvClusterSpecList(), topLevelPodSpecOverride, topLevelPersistenceOverride) + topLevelPodSpecOverride, topLevelPersistenceOverride := extractOverridesFromPodSpec(spec.ConfigSrvPodSpec) + configSrvComponentSpec := spec.ConfigSrvSpec.DeepCopy() + configSrvComponentSpec.ClusterSpecList = processClusterSpecList(configSrvComponentSpec.ClusterSpecList, topLevelPodSpecOverride, topLevelPersistenceOverride) return configSrvComponentSpec } @@ -833,7 +837,10 @@ func (r *ShardedClusterReconcileHelper) Reconcile(ctx context.Context, log *zap. return r.updateStatus(ctx, sc, workflow.Failed(err), log) } - // TODO: add comment why this was added + // We cannot allow removing cluster specification if the cluster is not scaled down to zero. + // For example: we have 3 members in a cluster, and we try to remove the entire cluster spec. The operator is scaling members down one by one. + // We could remove one member successfully, but recreate other members with default configuration, rather the one that was used before. + // Removing cluster spec would remove all non-default cluster configuration i.e. priority, persistence, etc. and that can lead to unexpected issues. if err := r.blockNonEmptyClusterSpecItemRemoval(); err != nil { return r.updateStatus(ctx, sc, workflow.Failed(err), log) } @@ -912,7 +919,6 @@ func (r *ShardedClusterReconcileHelper) Reconcile(ctx context.Context, log *zap. if sc.Spec.ShardCount < r.deploymentState.Status.ShardCount { r.removeUnusedStatefulsets(ctx, sc, log) } - // TODO: we should also remove unused configSrv and mongos statefulsets annotationsToAdd, err := getAnnotationsForResource(sc) if err != nil { From 883949a6b1b197e02ac5dc02c4a9b241f7740f2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Kara=C5=9B?= Date: Tue, 7 Oct 2025 10:38:32 +0200 Subject: [PATCH 10/12] test fixes + added link to Jira ticket --- api/v1/om/appdb_types.go | 5 +++-- .../tests/shardedcluster/sharded_cluster.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/api/v1/om/appdb_types.go b/api/v1/om/appdb_types.go index 2c4b28ebb..b8626b89e 100644 --- a/api/v1/om/appdb_types.go +++ b/api/v1/om/appdb_types.go @@ -494,9 +494,10 @@ func (m *AppDBSpec) GetMemberClusterSpecByName(memberClusterName string) mdbv1.C // In case the member cluster is not found in the cluster spec list, we return an empty ClusterSpecItem // with 0 members to handle the case of removing a cluster from the spec list without a panic. - // TODO: this is not ideal, because we don't consider other fields that were removed i.e.MemberConfig. + // + // This is not ideal, because we don't consider other fields that were removed i.e.MemberConfig. // When scaling down we should consider the full spec that was used to create the cluster. - // Create a ticket + // https://jira.mongodb.org/browse/CLOUDP-349925 return mdbv1.ClusterSpecItem{ ClusterName: memberClusterName, Members: 0, diff --git a/docker/mongodb-kubernetes-tests/tests/shardedcluster/sharded_cluster.py b/docker/mongodb-kubernetes-tests/tests/shardedcluster/sharded_cluster.py index 8ef232656..650bc9275 100644 --- a/docker/mongodb-kubernetes-tests/tests/shardedcluster/sharded_cluster.py +++ b/docker/mongodb-kubernetes-tests/tests/shardedcluster/sharded_cluster.py @@ -34,7 +34,7 @@ def sc(namespace: str, custom_mdb_version: str) -> MongoDB: resource=resource, shard_members_array=[1, 1, 1], mongos_members_array=[1, 1, None], - configsrv_members_array=[3, 1, 0], + configsrv_members_array=[1, 1, 1], ) return resource From ee6c193177337ceff38f9e830dcb87a67f0db53f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Kara=C5=9B?= Date: Wed, 8 Oct 2025 09:45:42 +0200 Subject: [PATCH 11/12] Update changelog --- ...06_fix_block_removing_non_zero_member_cluster_from.md | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/changelog/20251006_fix_block_removing_non_zero_member_cluster_from.md b/changelog/20251006_fix_block_removing_non_zero_member_cluster_from.md index 904c093f9..0ec31dff4 100644 --- a/changelog/20251006_fix_block_removing_non_zero_member_cluster_from.md +++ b/changelog/20251006_fix_block_removing_non_zero_member_cluster_from.md @@ -3,11 +3,4 @@ kind: fix date: 2025-10-06 --- -* **MultiClusterSharded**: Block removing non-zero member cluster from MongoDB resource. This prevents from scaling down member cluster without current configuration available, which can lead to unexpected issues. Previously operator was crashing in that scenario, after the fix it will mark reconciliation as `Failed` with appropriate message. Example unsafe scenario that is now blocked: - * User has 2 member clusters: `main` is used for application traffic, `read-analytics` is used for read-only analytics - * `main` cluster has 7 voting members - * `read-analytics` cluster has 3 non-voting members - * User decides to remove `read-analytics` cluster, by removing the `clusterSpecItem` completely - * Operator scales down members from `read-analytics` cluster one by one - * Because the configuration does not have voting options specified anymore and by default `priority` is set to 1, the operator will remove one member, but the other two members will be reconfigured as voting members - * `replicaset` contains now 9 voting members, which is not [supported by MongoDB](https://www.mongodb.com/docs/manual/reference/limits/#mongodb-limit-Number-of-Voting-Members-of-a-Replica-Set) +* **MultiClusterSharded**: Block removing non-zero member cluster from MongoDB resource. This prevents from scaling down member cluster without current configuration available, which could lead to unexpected issues. From 37602d2e618337c5a31e34581a1dd198a4c45e8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Kara=C5=9B?= <6159874+MaciejKaras@users.noreply.github.com> Date: Wed, 8 Oct 2025 16:32:09 +0200 Subject: [PATCH 12/12] Update changelog/20251006_fix_block_removing_non_zero_member_cluster_from.md Co-authored-by: Vivek Singh --- .../20251006_fix_block_removing_non_zero_member_cluster_from.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog/20251006_fix_block_removing_non_zero_member_cluster_from.md b/changelog/20251006_fix_block_removing_non_zero_member_cluster_from.md index 0ec31dff4..8137816c3 100644 --- a/changelog/20251006_fix_block_removing_non_zero_member_cluster_from.md +++ b/changelog/20251006_fix_block_removing_non_zero_member_cluster_from.md @@ -3,4 +3,4 @@ kind: fix date: 2025-10-06 --- -* **MultiClusterSharded**: Block removing non-zero member cluster from MongoDB resource. This prevents from scaling down member cluster without current configuration available, which could lead to unexpected issues. +* **MultiClusterSharded**: Blocked removing non-zero member cluster from MongoDB resource. This prevents from scaling down member cluster without current configuration available, which could lead to unexpected issues.