From 1d8847e7bb1af22e36d685d1b30fb20476a4bf39 Mon Sep 17 00:00:00 2001 From: Julien Benhaim Date: Thu, 2 Oct 2025 15:03:09 +0200 Subject: [PATCH 1/8] Scale up + disable tls in a single update --- .../tests/tls/e2e_tls_disable_and_scale_up.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/docker/mongodb-kubernetes-tests/tests/tls/e2e_tls_disable_and_scale_up.py b/docker/mongodb-kubernetes-tests/tests/tls/e2e_tls_disable_and_scale_up.py index 3eae37813..7f73823d3 100644 --- a/docker/mongodb-kubernetes-tests/tests/tls/e2e_tls_disable_and_scale_up.py +++ b/docker/mongodb-kubernetes-tests/tests/tls/e2e_tls_disable_and_scale_up.py @@ -38,18 +38,11 @@ def test_rs_is_running(replica_set: MongoDB): def test_tls_is_disabled_and_scaled_up(replica_set: MongoDB): replica_set.load() replica_set["spec"]["members"] = 5 - - replica_set.update() - - -@pytest.mark.e2e_disable_tls_scale_up -def test_tls_is_disabled_and_scaled_up(replica_set: MongoDB): - replica_set.load() replica_set["spec"]["security"]["tls"]["enabled"] = False del replica_set["spec"]["additionalMongodConfig"] replica_set.update() # timeout is longer because the operator first needs to - # disable TLS and then, scale down one by one. + # disable TLS and then, scale up one by one. replica_set.assert_reaches_phase(Phase.Running, timeout=800) From 2d3e5406c10d1d74c8987cb977ac143de8a498a6 Mon Sep 17 00:00:00 2001 From: Julien Benhaim Date: Thu, 2 Oct 2025 16:50:11 +0200 Subject: [PATCH 2/8] Tentative fix --- .../operator/mongodbreplicaset_controller.go | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/controllers/operator/mongodbreplicaset_controller.go b/controllers/operator/mongodbreplicaset_controller.go index 470e56716..2a7fdd7da 100644 --- a/controllers/operator/mongodbreplicaset_controller.go +++ b/controllers/operator/mongodbreplicaset_controller.go @@ -234,6 +234,25 @@ func (r *ReconcileMongoDbReplicaSet) Reconcile(ctx context.Context, request reco shouldMirrorKeyfile := r.applySearchOverrides(ctx, rs, log) sts := construct.DatabaseStatefulSet(*rs, rsConfig, log) + + // If TLS is being disabled, we need to lock the StatefulSet replicas at the current member count + // to prevent scaling during the TLS disable operation. + shouldLockMembers := false + omDeployment, err := conn.ReadDeployment() + if err != nil { + return r.updateStatus(ctx, rs, workflow.Failed(xerrors.Errorf("Failed to read deployment for TLS configuration check: %w", err)), log) + } + if omDeployment.TLSConfigurationWillBeDisabled(rs.Spec.GetSecurity()) { + shouldLockMembers = true + log.Infof("TLS is being disabled, locking StatefulSet replicas for this reconciliation at current member count: %d", rs.Status.Members) + } + + if shouldLockMembers { + // Override the StatefulSet replicas to match current members, preventing scale up/down during TLS disable + replicas := int32(rs.Status.Members) + sts.Spec.Replicas = &replicas + } + if status := r.ensureRoles(ctx, rs.Spec.DbCommonSpec, r.enableClusterMongoDBRoles, conn, kube.ObjectKeyFromApiObject(rs), log); !status.IsOK() { return r.updateStatus(ctx, rs, status, log) } @@ -459,7 +478,7 @@ func (r *ReconcileMongoDbReplicaSet) updateOmDeploymentRs(ctx context.Context, c return workflow.Failed(err) } - // If current operation is to Disable TLS, then we should the current members of the Replica Set, + // If current operation is to Disable TLS, then we should lock the current members of the Replica Set, // this is, do not scale them up or down util TLS disabling has completed. shouldLockMembers, err := updateOmDeploymentDisableTLSConfiguration(conn, r.imageUrls[mcoConstruct.MongodbImageEnv], r.forceEnterprise, membersNumberBefore, rs, set, log, caFilePath, tlsCertPath) if err != nil && !isRecovering { From dce404602d686ff19790c55f1448e4ba0d6ae2d4 Mon Sep 17 00:00:00 2001 From: Julien Benhaim Date: Thu, 2 Oct 2025 17:25:17 +0200 Subject: [PATCH 3/8] Try fix again --- .../operator/mongodbreplicaset_controller.go | 42 ++++++++++--------- .../tests/tls/e2e_tls_disable_and_scale_up.py | 5 ++- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/controllers/operator/mongodbreplicaset_controller.go b/controllers/operator/mongodbreplicaset_controller.go index 2a7fdd7da..db6eb2c82 100644 --- a/controllers/operator/mongodbreplicaset_controller.go +++ b/controllers/operator/mongodbreplicaset_controller.go @@ -234,25 +234,6 @@ func (r *ReconcileMongoDbReplicaSet) Reconcile(ctx context.Context, request reco shouldMirrorKeyfile := r.applySearchOverrides(ctx, rs, log) sts := construct.DatabaseStatefulSet(*rs, rsConfig, log) - - // If TLS is being disabled, we need to lock the StatefulSet replicas at the current member count - // to prevent scaling during the TLS disable operation. - shouldLockMembers := false - omDeployment, err := conn.ReadDeployment() - if err != nil { - return r.updateStatus(ctx, rs, workflow.Failed(xerrors.Errorf("Failed to read deployment for TLS configuration check: %w", err)), log) - } - if omDeployment.TLSConfigurationWillBeDisabled(rs.Spec.GetSecurity()) { - shouldLockMembers = true - log.Infof("TLS is being disabled, locking StatefulSet replicas for this reconciliation at current member count: %d", rs.Status.Members) - } - - if shouldLockMembers { - // Override the StatefulSet replicas to match current members, preventing scale up/down during TLS disable - replicas := int32(rs.Status.Members) - sts.Spec.Replicas = &replicas - } - if status := r.ensureRoles(ctx, rs.Spec.DbCommonSpec, r.enableClusterMongoDBRoles, conn, kube.ObjectKeyFromApiObject(rs), log); !status.IsOK() { return r.updateStatus(ctx, rs, status, log) } @@ -263,6 +244,19 @@ func (r *ReconcileMongoDbReplicaSet) Reconcile(ctx context.Context, request reco } } + // Check if TLS is being disabled. If so, lock the StatefulSet replicas at the current member count + // to prevent scaling during the TLS disable operation. + // This must be done before passing the StatefulSet to OM reconciliation. + tlsWillBeDisabled, err := checkIfTLSWillBeDisabled(conn, rs, log) + if err != nil { + return r.updateStatus(ctx, rs, workflow.Failed(xerrors.Errorf("Failed to check TLS configuration: %w", err)), log) + } + if tlsWillBeDisabled { + log.Infof("TLS is being disabled, locking StatefulSet replicas at current member count: %d", rs.Status.Members) + replicas := int32(rs.Status.Members) + sts.Spec.Replicas = &replicas + } + internalClusterCertPath := "" if internalClusterCertHash != "" { internalClusterCertPath = fmt.Sprintf("%s%s", util.InternalClusterAuthMountPath, internalClusterCertHash) @@ -594,6 +588,16 @@ func updateOmDeploymentDisableTLSConfiguration(conn om.Connection, mongoDBImage return tlsConfigWasDisabled, err } +// checkIfTLSWillBeDisabled checks if TLS configuration will be disabled in the next OM update +// without modifying the deployment. This is used to determine if we should lock StatefulSet replicas. +func checkIfTLSWillBeDisabled(conn om.Connection, rs *mdbv1.MongoDB, log *zap.SugaredLogger) (bool, error) { + deployment, err := conn.ReadDeployment() + if err != nil { + return false, err + } + return deployment.TLSConfigurationWillBeDisabled(rs.Spec.GetSecurity()), nil +} + func (r *ReconcileMongoDbReplicaSet) OnDelete(ctx context.Context, obj runtime.Object, log *zap.SugaredLogger) error { rs := obj.(*mdbv1.MongoDB) diff --git a/docker/mongodb-kubernetes-tests/tests/tls/e2e_tls_disable_and_scale_up.py b/docker/mongodb-kubernetes-tests/tests/tls/e2e_tls_disable_and_scale_up.py index 7f73823d3..a2e4b15b1 100644 --- a/docker/mongodb-kubernetes-tests/tests/tls/e2e_tls_disable_and_scale_up.py +++ b/docker/mongodb-kubernetes-tests/tests/tls/e2e_tls_disable_and_scale_up.py @@ -1,4 +1,6 @@ import pytest + +from kubetester import try_load from kubetester.certs import ISSUER_CA_NAME, create_mongodb_tls_certs from kubetester.kubetester import fixture as load_fixture from kubetester.mongodb import MongoDB @@ -20,7 +22,8 @@ def replica_set(namespace: str, server_certs: str, issuer_ca_configmap: str) -> # Set this ReplicaSet to allowSSL mode # this is the only mode that can go to "disabled" state. res["spec"]["additionalMongodConfig"] = {"net": {"ssl": {"mode": "allowSSL"}}} - + if try_load(res): + return res return res.create() From cd569e1d4551c3eb66ac2ea9e5a83f2b92117645 Mon Sep 17 00:00:00 2001 From: Julien Benhaim Date: Thu, 2 Oct 2025 18:57:27 +0200 Subject: [PATCH 4/8] Effective replicas fix --- .../operator/mongodbreplicaset_controller.go | 74 +++++++++---------- .../tests/tls/e2e_tls_disable_and_scale_up.py | 1 - 2 files changed, 37 insertions(+), 38 deletions(-) diff --git a/controllers/operator/mongodbreplicaset_controller.go b/controllers/operator/mongodbreplicaset_controller.go index db6eb2c82..2face2845 100644 --- a/controllers/operator/mongodbreplicaset_controller.go +++ b/controllers/operator/mongodbreplicaset_controller.go @@ -244,19 +244,28 @@ func (r *ReconcileMongoDbReplicaSet) Reconcile(ctx context.Context, request reco } } - // Check if TLS is being disabled. If so, lock the StatefulSet replicas at the current member count - // to prevent scaling during the TLS disable operation. - // This must be done before passing the StatefulSet to OM reconciliation. + // Check if TLS is being disabled. If so, we need to lock replicas at the current member count + // to prevent scaling during the TLS disable operation. This decision is made once here and + // applied to both the StatefulSet and OM automation config. tlsWillBeDisabled, err := checkIfTLSWillBeDisabled(conn, rs, log) if err != nil { return r.updateStatus(ctx, rs, workflow.Failed(xerrors.Errorf("Failed to check TLS configuration: %w", err)), log) } + + // Compute effective replicas for this reconciliation + var effectiveReplicas int32 if tlsWillBeDisabled { - log.Infof("TLS is being disabled, locking StatefulSet replicas at current member count: %d", rs.Status.Members) - replicas := int32(rs.Status.Members) - sts.Spec.Replicas = &replicas + // Lock at current member count during TLS disable + effectiveReplicas = int32(rs.Status.Members) + log.Infof("TLS is being disabled, locking replicas at current member count: %d", rs.Status.Members) + } else { + // Normal scaling logic + effectiveReplicas = int32(scale.ReplicasThisReconciliation(rs)) } + // Apply effective replicas to StatefulSet + sts.Spec.Replicas = &effectiveReplicas + internalClusterCertPath := "" if internalClusterCertHash != "" { internalClusterCertPath = fmt.Sprintf("%s%s", util.InternalClusterAuthMountPath, internalClusterCertHash) @@ -272,7 +281,7 @@ func (r *ReconcileMongoDbReplicaSet) Reconcile(ctx context.Context, request reco // See CLOUDP-189433 and CLOUDP-229222 for more details. if recovery.ShouldTriggerRecovery(rs.Status.Phase != mdbstatus.PhaseRunning, rs.Status.LastTransition) { log.Warnf("Triggering Automatic Recovery. The MongoDB resource %s/%s is in %s state since %s", rs.Namespace, rs.Name, rs.Status.Phase, rs.Status.LastTransition) - automationConfigStatus := r.updateOmDeploymentRs(ctx, conn, rs.Status.Members, rs, sts, log, agentCertPath, caFilePath, tlsCertPath, internalClusterCertPath, prometheusCertHash, true, shouldMirrorKeyfile).OnErrorPrepend("Failed to create/update (Ops Manager reconciliation phase):") + automationConfigStatus := r.updateOmDeploymentRs(ctx, conn, rs.Status.Members, rs, &sts, log, agentCertPath, caFilePath, tlsCertPath, internalClusterCertPath, prometheusCertHash, true, shouldMirrorKeyfile).OnErrorPrepend("Failed to create/update (Ops Manager reconciliation phase):") deploymentError := create.DatabaseInKubernetes(ctx, r.client, *rs, sts, rsConfig, log) if deploymentError != nil { log.Errorf("Recovery failed because of deployment errors, %w", deploymentError) @@ -288,7 +297,7 @@ func (r *ReconcileMongoDbReplicaSet) Reconcile(ctx context.Context, request reco } status = workflow.RunInGivenOrder(publishAutomationConfigFirst(ctx, r.client, *rs, lastSpec, rsConfig, log), func() workflow.Status { - return r.updateOmDeploymentRs(ctx, conn, rs.Status.Members, rs, sts, log, agentCertPath, caFilePath, tlsCertPath, internalClusterCertPath, prometheusCertHash, false, shouldMirrorKeyfile).OnErrorPrepend("Failed to create/update (Ops Manager reconciliation phase):") + return r.updateOmDeploymentRs(ctx, conn, rs.Status.Members, rs, &sts, log, agentCertPath, caFilePath, tlsCertPath, internalClusterCertPath, prometheusCertHash, false, shouldMirrorKeyfile).OnErrorPrepend("Failed to create/update (Ops Manager reconciliation phase):") }, func() workflow.Status { workflowStatus := create.HandlePVCResize(ctx, r.client, &sts, log) @@ -462,33 +471,28 @@ func AddReplicaSetController(ctx context.Context, mgr manager.Manager, imageUrls // updateOmDeploymentRs performs OM registration operation for the replicaset. So the changes will be finally propagated // to automation agents in containers -func (r *ReconcileMongoDbReplicaSet) updateOmDeploymentRs(ctx context.Context, conn om.Connection, membersNumberBefore int, rs *mdbv1.MongoDB, set appsv1.StatefulSet, log *zap.SugaredLogger, agentCertPath, caFilePath, tlsCertPath, internalClusterCertPath string, prometheusCertHash string, isRecovering bool, shouldMirrorKeyfileForMongot bool) workflow.Status { +func (r *ReconcileMongoDbReplicaSet) updateOmDeploymentRs(ctx context.Context, conn om.Connection, membersNumberBefore int, rs *mdbv1.MongoDB, set *appsv1.StatefulSet, log *zap.SugaredLogger, agentCertPath, caFilePath, tlsCertPath, internalClusterCertPath string, prometheusCertHash string, isRecovering bool, shouldMirrorKeyfileForMongot bool) workflow.Status { log.Debug("Entering UpdateOMDeployments") // Only "concrete" RS members should be observed // - if scaling down, let's observe only members that will remain after scale-down operation // - if scaling up, observe only current members, because new ones might not exist yet - err := agents.WaitForRsAgentsToRegister(set, util_int.Min(membersNumberBefore, int(*set.Spec.Replicas)), rs.Spec.GetClusterDomain(), conn, log, rs) + // The effective replica count has already been computed in the caller (Reconcile) + // and applied to set.Spec.Replicas. We use that value directly here. + effectiveReplicas := int(*set.Spec.Replicas) + + err := agents.WaitForRsAgentsToRegister(*set, util_int.Min(membersNumberBefore, effectiveReplicas), rs.Spec.GetClusterDomain(), conn, log, rs) if err != nil && !isRecovering { return workflow.Failed(err) } - // If current operation is to Disable TLS, then we should lock the current members of the Replica Set, - // this is, do not scale them up or down util TLS disabling has completed. - shouldLockMembers, err := updateOmDeploymentDisableTLSConfiguration(conn, r.imageUrls[mcoConstruct.MongodbImageEnv], r.forceEnterprise, membersNumberBefore, rs, set, log, caFilePath, tlsCertPath) + // If TLS is being disabled, update the OM automation config to disable it. + // The replica count is already locked via set.Spec.Replicas. + err = updateOmDeploymentDisableTLSConfiguration(conn, r.imageUrls[mcoConstruct.MongodbImageEnv], r.forceEnterprise, effectiveReplicas, rs, *set, log, caFilePath, tlsCertPath) if err != nil && !isRecovering { return workflow.Failed(err) } - var updatedMembers int - if shouldLockMembers { - // We should not add or remove members during this run, we'll wait for - // TLS to be completely disabled first. - updatedMembers = membersNumberBefore - } else { - updatedMembers = int(*set.Spec.Replicas) - } - - replicaSet := replicaset.BuildFromStatefulSetWithReplicas(r.imageUrls[mcoConstruct.MongodbImageEnv], r.forceEnterprise, set, rs.GetSpec(), updatedMembers, rs.CalculateFeatureCompatibilityVersion(), tlsCertPath) + replicaSet := replicaset.BuildFromStatefulSetWithReplicas(r.imageUrls[mcoConstruct.MongodbImageEnv], r.forceEnterprise, *set, rs.GetSpec(), effectiveReplicas, rs.CalculateFeatureCompatibilityVersion(), tlsCertPath) processNames := replicaSet.GetProcessNames() status, additionalReconciliationRequired := r.updateOmAuthentication(ctx, conn, processNames, rs, agentCertPath, caFilePath, internalClusterCertPath, isRecovering, log) @@ -539,8 +543,8 @@ func (r *ReconcileMongoDbReplicaSet) updateOmDeploymentRs(ctx context.Context, c } externalDomain := rs.Spec.DbCommonSpec.GetExternalDomain() - hostsBefore := getAllHostsRs(set, rs.Spec.GetClusterDomain(), membersNumberBefore, externalDomain) - hostsAfter := getAllHostsRs(set, rs.Spec.GetClusterDomain(), scale.ReplicasThisReconciliation(rs), externalDomain) + hostsBefore := getAllHostsRs(*set, rs.Spec.GetClusterDomain(), membersNumberBefore, externalDomain) + hostsAfter := getAllHostsRs(*set, rs.Spec.GetClusterDomain(), scale.ReplicasThisReconciliation(rs), externalDomain) if err := host.CalculateDiffAndStopMonitoring(conn, hostsBefore, hostsAfter, log); err != nil && !isRecovering { return workflow.Failed(err) @@ -555,23 +559,21 @@ func (r *ReconcileMongoDbReplicaSet) updateOmDeploymentRs(ctx context.Context, c } // updateOmDeploymentDisableTLSConfiguration checks if TLS configuration needs -// to be disabled. In which case it will disable it and inform to the calling -// function. -func updateOmDeploymentDisableTLSConfiguration(conn om.Connection, mongoDBImage string, forceEnterprise bool, membersNumberBefore int, rs *mdbv1.MongoDB, set appsv1.StatefulSet, log *zap.SugaredLogger, caFilePath, tlsCertPath string) (bool, error) { - tlsConfigWasDisabled := false - - err := conn.ReadUpdateDeployment( +// to be disabled, and if so, updates the OM automation config to disable it. +// The effectiveReplicas parameter contains the already-computed replica count to use. +func updateOmDeploymentDisableTLSConfiguration(conn om.Connection, mongoDBImage string, forceEnterprise bool, effectiveReplicas int, rs *mdbv1.MongoDB, set appsv1.StatefulSet, log *zap.SugaredLogger, caFilePath, tlsCertPath string) error { + return conn.ReadUpdateDeployment( func(d om.Deployment) error { if !d.TLSConfigurationWillBeDisabled(rs.Spec.GetSecurity()) { return nil } - tlsConfigWasDisabled = true + log.Debug("Disabling TLS in OM automation config") d.ConfigureTLS(rs.Spec.GetSecurity(), caFilePath) - // configure as many agents/Pods as we currently have, no more (in case - // there's a scale up change at the same time). - replicaSet := replicaset.BuildFromStatefulSetWithReplicas(mongoDBImage, forceEnterprise, set, rs.GetSpec(), membersNumberBefore, rs.CalculateFeatureCompatibilityVersion(), tlsCertPath) + // Use the effective replica count that was already computed in the caller. + // This ensures we use the same locked value during TLS disable. + replicaSet := replicaset.BuildFromStatefulSetWithReplicas(mongoDBImage, forceEnterprise, set, rs.GetSpec(), effectiveReplicas, rs.CalculateFeatureCompatibilityVersion(), tlsCertPath) lastConfig, err := rs.GetLastAdditionalMongodConfigByType(mdbv1.ReplicaSetConfig) if err != nil { @@ -584,8 +586,6 @@ func updateOmDeploymentDisableTLSConfiguration(conn om.Connection, mongoDBImage }, log, ) - - return tlsConfigWasDisabled, err } // checkIfTLSWillBeDisabled checks if TLS configuration will be disabled in the next OM update diff --git a/docker/mongodb-kubernetes-tests/tests/tls/e2e_tls_disable_and_scale_up.py b/docker/mongodb-kubernetes-tests/tests/tls/e2e_tls_disable_and_scale_up.py index a2e4b15b1..f708341a4 100644 --- a/docker/mongodb-kubernetes-tests/tests/tls/e2e_tls_disable_and_scale_up.py +++ b/docker/mongodb-kubernetes-tests/tests/tls/e2e_tls_disable_and_scale_up.py @@ -1,5 +1,4 @@ import pytest - from kubetester import try_load from kubetester.certs import ISSUER_CA_NAME, create_mongodb_tls_certs from kubetester.kubetester import fixture as load_fixture From 29faeffc7c6663d86dc51c37ee6792bc22ef92e7 Mon Sep 17 00:00:00 2001 From: Julien Benhaim Date: Fri, 3 Oct 2025 12:23:51 +0200 Subject: [PATCH 5/8] Only wait for processes with pods --- controllers/operator/mongodbreplicaset_controller.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/controllers/operator/mongodbreplicaset_controller.go b/controllers/operator/mongodbreplicaset_controller.go index 2face2845..dbc2d61ca 100644 --- a/controllers/operator/mongodbreplicaset_controller.go +++ b/controllers/operator/mongodbreplicaset_controller.go @@ -479,7 +479,7 @@ func (r *ReconcileMongoDbReplicaSet) updateOmDeploymentRs(ctx context.Context, c // The effective replica count has already been computed in the caller (Reconcile) // and applied to set.Spec.Replicas. We use that value directly here. effectiveReplicas := int(*set.Spec.Replicas) - + err := agents.WaitForRsAgentsToRegister(*set, util_int.Min(membersNumberBefore, effectiveReplicas), rs.Spec.GetClusterDomain(), conn, log, rs) if err != nil && !isRecovering { return workflow.Failed(err) @@ -529,7 +529,12 @@ func (r *ReconcileMongoDbReplicaSet) updateOmDeploymentRs(ctx context.Context, c return workflow.Failed(err) } - if err := om.WaitForReadyState(conn, processNames, isRecovering, log); err != nil { + // Only wait for the processes that currently have pods. During scale-up, we add all desired + // processes to the automation config, but we only wait for the ones that exist now. The + // StatefulSet will be updated after this, creating new pods incrementally. + numProcessesToWait := util_int.Min(membersNumberBefore, effectiveReplicas) + processesToWait := processNames[:numProcessesToWait] + if err := om.WaitForReadyState(conn, processesToWait, isRecovering, log); err != nil { return workflow.Failed(err) } From e12bd07bb0e4eca8b6ba66e22731efca2082973f Mon Sep 17 00:00:00 2001 From: Julien Benhaim Date: Fri, 3 Oct 2025 12:24:22 +0200 Subject: [PATCH 6/8] Update test for tls disabled --- .../mongodbreplicaset_controller_test.go | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/controllers/operator/mongodbreplicaset_controller_test.go b/controllers/operator/mongodbreplicaset_controller_test.go index e85e7050d..8bf9713ca 100644 --- a/controllers/operator/mongodbreplicaset_controller_test.go +++ b/controllers/operator/mongodbreplicaset_controller_test.go @@ -400,24 +400,32 @@ func TestUpdateDeploymentTLSConfiguration(t *testing.T) { stsNoTLS := construct.DatabaseStatefulSet(*rsNoTLS, construct.ReplicaSetOptions(construct.GetPodEnvOptions()), zap.S()) // TLS Disabled -> TLS Disabled - shouldLockMembers, err := updateOmDeploymentDisableTLSConfiguration(om.NewMockedOmConnection(deploymentNoTLS), "fake-mongoDBImage", false, 3, rsNoTLS, stsNoTLS, zap.S(), util.CAFilePathInContainer, "") + willDisable, err := checkIfTLSWillBeDisabled(om.NewMockedOmConnection(deploymentNoTLS), rsNoTLS, zap.S()) + assert.NoError(t, err) + assert.False(t, willDisable) + err = updateOmDeploymentDisableTLSConfiguration(om.NewMockedOmConnection(deploymentNoTLS), "fake-mongoDBImage", false, 3, rsNoTLS, stsNoTLS, zap.S(), util.CAFilePathInContainer, "") assert.NoError(t, err) - assert.False(t, shouldLockMembers) // TLS Disabled -> TLS Enabled - shouldLockMembers, err = updateOmDeploymentDisableTLSConfiguration(om.NewMockedOmConnection(deploymentNoTLS), "fake-mongoDBImage", false, 3, rsWithTLS, stsWithTLS, zap.S(), util.CAFilePathInContainer, "") + willDisable, err = checkIfTLSWillBeDisabled(om.NewMockedOmConnection(deploymentNoTLS), rsWithTLS, zap.S()) + assert.NoError(t, err) + assert.False(t, willDisable) + err = updateOmDeploymentDisableTLSConfiguration(om.NewMockedOmConnection(deploymentNoTLS), "fake-mongoDBImage", false, 3, rsWithTLS, stsWithTLS, zap.S(), util.CAFilePathInContainer, "") assert.NoError(t, err) - assert.False(t, shouldLockMembers) // TLS Enabled -> TLS Enabled - shouldLockMembers, err = updateOmDeploymentDisableTLSConfiguration(om.NewMockedOmConnection(deploymentWithTLS), "fake-mongoDBImage", false, 3, rsWithTLS, stsWithTLS, zap.S(), util.CAFilePathInContainer, "") + willDisable, err = checkIfTLSWillBeDisabled(om.NewMockedOmConnection(deploymentWithTLS), rsWithTLS, zap.S()) + assert.NoError(t, err) + assert.False(t, willDisable) + err = updateOmDeploymentDisableTLSConfiguration(om.NewMockedOmConnection(deploymentWithTLS), "fake-mongoDBImage", false, 3, rsWithTLS, stsWithTLS, zap.S(), util.CAFilePathInContainer, "") assert.NoError(t, err) - assert.False(t, shouldLockMembers) // TLS Enabled -> TLS Disabled - shouldLockMembers, err = updateOmDeploymentDisableTLSConfiguration(om.NewMockedOmConnection(deploymentWithTLS), "fake-mongoDBImage", false, 3, rsNoTLS, stsNoTLS, zap.S(), util.CAFilePathInContainer, "") + willDisable, err = checkIfTLSWillBeDisabled(om.NewMockedOmConnection(deploymentWithTLS), rsNoTLS, zap.S()) + assert.NoError(t, err) + assert.True(t, willDisable) + err = updateOmDeploymentDisableTLSConfiguration(om.NewMockedOmConnection(deploymentWithTLS), "fake-mongoDBImage", false, 3, rsNoTLS, stsNoTLS, zap.S(), util.CAFilePathInContainer, "") assert.NoError(t, err) - assert.True(t, shouldLockMembers) } // TestCreateDeleteReplicaSet checks that no state is left in OpsManager on removal of the replicaset From 52bfba16c621620ed137be4f97a122dd25268cb2 Mon Sep 17 00:00:00 2001 From: Julien Benhaim Date: Fri, 3 Oct 2025 13:01:00 +0200 Subject: [PATCH 7/8] deepcopy update --- api/v1/search/zz_generated.deepcopy.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/api/v1/search/zz_generated.deepcopy.go b/api/v1/search/zz_generated.deepcopy.go index c66322146..d18d025f8 100644 --- a/api/v1/search/zz_generated.deepcopy.go +++ b/api/v1/search/zz_generated.deepcopy.go @@ -159,7 +159,7 @@ func (in *MongoDBSearchSpec) DeepCopyInto(out *MongoDBSearchSpec) { *out = new(v1.ResourceRequirements) (*in).DeepCopyInto(*out) } - out.Security = in.Security + in.Security.DeepCopyInto(&out.Security) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MongoDBSearchSpec. @@ -231,7 +231,11 @@ func (in *MongoDBSource) DeepCopy() *MongoDBSource { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Security) DeepCopyInto(out *Security) { *out = *in - out.TLS = in.TLS + if in.TLS != nil { + in, out := &in.TLS, &out.TLS + *out = new(TLS) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Security. From 20f6845f69221921562171d8971820dd0313e329 Mon Sep 17 00:00:00 2001 From: Julien Benhaim Date: Fri, 3 Oct 2025 13:01:07 +0200 Subject: [PATCH 8/8] Edge case with 0 member --- controllers/operator/mongodbreplicaset_controller.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/controllers/operator/mongodbreplicaset_controller.go b/controllers/operator/mongodbreplicaset_controller.go index dbc2d61ca..c894c7cc5 100644 --- a/controllers/operator/mongodbreplicaset_controller.go +++ b/controllers/operator/mongodbreplicaset_controller.go @@ -254,8 +254,9 @@ func (r *ReconcileMongoDbReplicaSet) Reconcile(ctx context.Context, request reco // Compute effective replicas for this reconciliation var effectiveReplicas int32 - if tlsWillBeDisabled { - // Lock at current member count during TLS disable + if tlsWillBeDisabled && rs.Status.Members > 0 { + // Lock at current member count during TLS disable, but only if there are existing members. + // For initial creation (Members == 0), use normal scaling logic. effectiveReplicas = int32(rs.Status.Members) log.Infof("TLS is being disabled, locking replicas at current member count: %d", rs.Status.Members) } else {