Skip to content

Commit 073032b

Browse files
committed
Edge case TLS disabled and rs scaled
1 parent eb78918 commit 073032b

File tree

2 files changed

+22
-15
lines changed

2 files changed

+22
-15
lines changed

controllers/operator/mongodbreplicaset_controller.go

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -576,7 +576,7 @@ func (r *ReconcileMongoDbReplicaSet) updateOmDeploymentRs(ctx context.Context, c
576576
caFilePath := fmt.Sprintf("%s/ca-pem", util.TLSCaMountPath)
577577
// If current operation is to Disable TLS, then we should the current members of the Replica Set,
578578
// this is, do not scale them up or down util TLS disabling has completed.
579-
shouldLockMembers, err := updateOmDeploymentDisableTLSConfiguration(conn, r.imageUrls[mcoConstruct.MongodbImageEnv], r.forceEnterprise, membersNumberBefore, rs, log, caFilePath, tlsCertPath)
579+
shouldLockMembers, err := updateOmDeploymentDisableTLSConfiguration(conn, r.imageUrls[mcoConstruct.MongodbImageEnv], r.forceEnterprise, replicasTarget, rs, log, caFilePath, tlsCertPath)
580580
if err != nil && !isRecovering {
581581
return workflow.Failed(err)
582582
}
@@ -585,7 +585,12 @@ func (r *ReconcileMongoDbReplicaSet) updateOmDeploymentRs(ctx context.Context, c
585585
if shouldLockMembers {
586586
// We should not add or remove members during this run, we'll wait for
587587
// TLS to be completely disabled first.
588-
updatedMembers = membersNumberBefore
588+
// However, on first reconciliation when membersNumberBefore=0, we need to use replicasTarget
589+
if membersNumberBefore == 0 {
590+
updatedMembers = replicasTarget
591+
} else {
592+
updatedMembers = membersNumberBefore
593+
}
589594
} else {
590595
updatedMembers = replicasTarget
591596
}
@@ -657,10 +662,18 @@ func (r *ReconcileMongoDbReplicaSet) updateOmDeploymentRs(ctx context.Context, c
657662
return workflow.OK()
658663
}
659664

660-
// updateOmDeploymentDisableTLSConfiguration checks if TLS configuration needs
661-
// to be disabled. In which case it will disable it and inform to the calling
662-
// function.
663-
func updateOmDeploymentDisableTLSConfiguration(conn om.Connection, mongoDBImage string, forceEnterprise bool, membersNumberBefore int, rs *mdbv1.MongoDB, log *zap.SugaredLogger, caFilePath, tlsCertPath string) (bool, error) {
665+
// updateOmDeploymentDisableTLSConfiguration handles the edge case where TLS is disabled while
666+
// simultaneously scaling the replica set. Without this safeguard, automation agents could fail during the
667+
// transition, or new pods might join with inconsistent TLS configuration.
668+
//
669+
// This function implements a two-phase reconciliation pattern:
670+
// 1. First reconciliation: Disable TLS on existing members (returns shouldLockMembers=true to prevent scaling)
671+
// 2. Second reconciliation: Once TLS is fully disabled, allow scaling operations to proceed
672+
//
673+
// Related ticket: CLOUDP-80768 (March 2021)
674+
// See also: e2e_tls_disable_and_scale_up.py test
675+
// See also: e2e_tls_disable_and_scale_down.py test
676+
func updateOmDeploymentDisableTLSConfiguration(conn om.Connection, mongoDBImage string, forceEnterprise bool, currentMemberCount int, rs *mdbv1.MongoDB, log *zap.SugaredLogger, caFilePath, tlsCertPath string) (bool, error) {
664677
tlsConfigWasDisabled := false
665678

666679
err := conn.ReadUpdateDeployment(
@@ -674,7 +687,7 @@ func updateOmDeploymentDisableTLSConfiguration(conn om.Connection, mongoDBImage
674687

675688
// configure as many agents/Pods as we currently have, no more (in case
676689
// there's a scale up change at the same time).
677-
replicaSet := replicaset.BuildFromMongoDBWithReplicas(mongoDBImage, forceEnterprise, rs, membersNumberBefore, rs.CalculateFeatureCompatibilityVersion(), tlsCertPath)
690+
replicaSet := replicaset.BuildFromMongoDBWithReplicas(mongoDBImage, forceEnterprise, rs, currentMemberCount, rs.CalculateFeatureCompatibilityVersion(), tlsCertPath)
678691

679692
lastConfig, err := rs.GetLastAdditionalMongodConfigByType(mdbv1.ReplicaSetConfig)
680693
if err != nil {

docker/mongodb-kubernetes-tests/tests/tls/e2e_tls_disable_and_scale_up.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,19 +37,13 @@ def test_rs_is_running(replica_set: MongoDB):
3737
@pytest.mark.e2e_disable_tls_scale_up
3838
def test_tls_is_disabled_and_scaled_up(replica_set: MongoDB):
3939
replica_set.load()
40+
# Parameters must be changed at the same time
4041
replica_set["spec"]["members"] = 5
41-
42-
replica_set.update()
43-
44-
45-
@pytest.mark.e2e_disable_tls_scale_up
46-
def test_tls_is_disabled_and_scaled_up(replica_set: MongoDB):
47-
replica_set.load()
4842
replica_set["spec"]["security"]["tls"]["enabled"] = False
4943
del replica_set["spec"]["additionalMongodConfig"]
5044

5145
replica_set.update()
5246

5347
# timeout is longer because the operator first needs to
54-
# disable TLS and then, scale down one by one.
48+
# disable TLS on existing members, then scale up one by one.
5549
replica_set.assert_reaches_phase(Phase.Running, timeout=800)

0 commit comments

Comments
 (0)