diff --git a/api/v1/mdb/mongodb_types.go b/api/v1/mdb/mongodb_types.go index 91765cc81..6653203c2 100644 --- a/api/v1/mdb/mongodb_types.go +++ b/api/v1/mdb/mongodb_types.go @@ -1037,7 +1037,7 @@ func (a *Authentication) IsOIDCEnabled() bool { return stringutil.Contains(a.GetModes(), util.OIDC) } -// GetModes returns the modes of the Authentication instance of an empty +// GetModes returns the modes of the Authentication instance, or an empty // list if it is nil func (a *Authentication) GetModes() []string { if a == nil { diff --git a/controllers/om/process/om_process.go b/controllers/om/process/om_process.go index 20dc293ec..87dfb9df9 100644 --- a/controllers/om/process/om_process.go +++ b/controllers/om/process/om_process.go @@ -22,6 +22,18 @@ func CreateMongodProcessesWithLimit(mongoDBImage string, forceEnterprise bool, s return processes } +// CreateMongodProcessesFromMongoDB creates mongod processes directly from MongoDB resource without StatefulSet +func CreateMongodProcessesFromMongoDB(mongoDBImage string, forceEnterprise bool, mdb *mdbv1.MongoDB, limit int, fcv string, tlsCertPath string) []om.Process { + hostnames, names := dns.GetDNSNames(mdb.Name, mdb.ServiceName(), mdb.Namespace, mdb.Spec.GetClusterDomain(), limit, mdb.Spec.DbCommonSpec.GetExternalDomain()) + processes := make([]om.Process, len(hostnames)) + + for idx, hostname := range hostnames { + processes[idx] = om.NewMongodProcess(names[idx], hostname, mongoDBImage, forceEnterprise, mdb.Spec.GetAdditionalMongodConfig(), &mdb.Spec, tlsCertPath, mdb.Annotations, fcv) + } + + return processes +} + // CreateMongodProcessesWithLimitMulti creates the process array for automationConfig based on MultiCluster CR spec func CreateMongodProcessesWithLimitMulti(mongoDBImage string, forceEnterprise bool, mrs mdbmultiv1.MongoDBMultiCluster, certFileName string) ([]om.Process, error) { hostnames := make([]string, 0) diff --git a/controllers/om/process/om_process_test.go b/controllers/om/process/om_process_test.go new file mode 100644 index 000000000..818cfd0cd --- /dev/null +++ b/controllers/om/process/om_process_test.go @@ -0,0 +1,230 @@ +package process + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + mdbv1 "github.com/mongodb/mongodb-kubernetes/api/v1/mdb" + "github.com/mongodb/mongodb-kubernetes/pkg/util/maputil" +) + +const ( + defaultMongoDBImage = "mongodb/mongodb-enterprise-server:7.0.0" + defaultFCV = "7.0" + defaultNamespace = "test-namespace" +) + +func TestCreateMongodProcessesFromMongoDB(t *testing.T) { + tests := []struct { + name string + mdb *mdbv1.MongoDB + limit int + mongoDBImage string + forceEnterprise bool + fcv string + tlsCertPath string + expectedCount int + expectedHostnames []string + expectedNames []string + }{ + { + name: "3-member replica set", + mdb: baseReplicaSet("test-rs", 3), + limit: 3, + mongoDBImage: defaultMongoDBImage, + forceEnterprise: false, + fcv: defaultFCV, + expectedCount: 3, + expectedHostnames: []string{ + "test-rs-0.test-rs-svc.test-namespace.svc.cluster.local", + "test-rs-1.test-rs-svc.test-namespace.svc.cluster.local", + "test-rs-2.test-rs-svc.test-namespace.svc.cluster.local", + }, + expectedNames: []string{"test-rs-0", "test-rs-1", "test-rs-2"}, + }, + { + name: "Single member replica set", + mdb: baseReplicaSet("single-rs", 1), + limit: 1, + mongoDBImage: defaultMongoDBImage, + forceEnterprise: false, + fcv: defaultFCV, + expectedCount: 1, + expectedHostnames: []string{ + "single-rs-0.single-rs-svc.test-namespace.svc.cluster.local", + }, + expectedNames: []string{"single-rs-0"}, + }, + { + name: "Limit less than members (scale up in progress)", + mdb: baseReplicaSet("scale-up-rs", 5), + limit: 3, + mongoDBImage: defaultMongoDBImage, + forceEnterprise: false, + fcv: defaultFCV, + expectedCount: 3, + expectedHostnames: []string{ + "scale-up-rs-0.scale-up-rs-svc.test-namespace.svc.cluster.local", + "scale-up-rs-1.scale-up-rs-svc.test-namespace.svc.cluster.local", + "scale-up-rs-2.scale-up-rs-svc.test-namespace.svc.cluster.local", + }, + expectedNames: []string{"scale-up-rs-0", "scale-up-rs-1", "scale-up-rs-2"}, + }, + { + name: "Limit greater than members (scale down in progress)", + mdb: baseReplicaSet("scale-down-rs", 3), + limit: 5, + mongoDBImage: defaultMongoDBImage, + forceEnterprise: false, + fcv: defaultFCV, + expectedCount: 5, + expectedHostnames: []string{ + "scale-down-rs-0.scale-down-rs-svc.test-namespace.svc.cluster.local", + "scale-down-rs-1.scale-down-rs-svc.test-namespace.svc.cluster.local", + "scale-down-rs-2.scale-down-rs-svc.test-namespace.svc.cluster.local", + "scale-down-rs-3.scale-down-rs-svc.test-namespace.svc.cluster.local", + "scale-down-rs-4.scale-down-rs-svc.test-namespace.svc.cluster.local", + }, + expectedNames: []string{"scale-down-rs-0", "scale-down-rs-1", "scale-down-rs-2", "scale-down-rs-3", "scale-down-rs-4"}, + }, + { + name: "Limit zero creates empty slice", + mdb: baseReplicaSet("empty-rs", 3), + limit: 0, + mongoDBImage: defaultMongoDBImage, + forceEnterprise: false, + fcv: defaultFCV, + expectedCount: 0, + expectedHostnames: []string{}, + expectedNames: []string{}, + }, + { + name: "Custom cluster domain", + mdb: func() *mdbv1.MongoDB { + rs := baseReplicaSet("custom-domain-rs", 2) + rs.Spec.ClusterDomain = "my-cluster.local" + return rs + }(), + + limit: 2, + mongoDBImage: defaultMongoDBImage, + forceEnterprise: false, + fcv: defaultFCV, + expectedCount: 2, + expectedHostnames: []string{ + "custom-domain-rs-0.custom-domain-rs-svc.test-namespace.svc.my-cluster.local", + "custom-domain-rs-1.custom-domain-rs-svc.test-namespace.svc.my-cluster.local", + }, + expectedNames: []string{"custom-domain-rs-0", "custom-domain-rs-1"}, + }, + { + name: "Different namespace", + mdb: func() *mdbv1.MongoDB { + rs := baseReplicaSet("other-ns-rs", 2) + rs.Namespace = "production" + return rs + }(), + limit: 2, + mongoDBImage: defaultMongoDBImage, + forceEnterprise: false, + fcv: defaultFCV, + expectedCount: 2, + expectedHostnames: []string{ + "other-ns-rs-0.other-ns-rs-svc.production.svc.cluster.local", + "other-ns-rs-1.other-ns-rs-svc.production.svc.cluster.local", + }, + expectedNames: []string{"other-ns-rs-0", "other-ns-rs-1"}, + }, + { + name: "With TLS cert path", + mdb: func() *mdbv1.MongoDB { + rs := baseReplicaSet("tls-rs", 2) + rs.Spec.Security = &mdbv1.Security{ + TLSConfig: &mdbv1.TLSConfig{Enabled: true}, + } + return rs + }(), + limit: 2, + mongoDBImage: defaultMongoDBImage, + forceEnterprise: false, + fcv: defaultFCV, + tlsCertPath: "/path/to/cert.pem", + expectedCount: 2, + expectedHostnames: []string{ + "tls-rs-0.tls-rs-svc.test-namespace.svc.cluster.local", + "tls-rs-1.tls-rs-svc.test-namespace.svc.cluster.local", + }, + expectedNames: []string{"tls-rs-0", "tls-rs-1"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + processes := CreateMongodProcessesFromMongoDB( + tt.mongoDBImage, + tt.forceEnterprise, + tt.mdb, + tt.limit, + tt.fcv, + tt.tlsCertPath, + ) + + assert.Equal(t, tt.expectedCount, len(processes), "Process count mismatch") + + for i, process := range processes { + assert.Equal(t, tt.expectedNames[i], process.Name(), "Process name mismatch at index %d", i) + assert.Equal(t, tt.expectedHostnames[i], process.HostName(), "Hostname mismatch at index %d", i) + assert.Equal(t, tt.fcv, process.FeatureCompatibilityVersion(), "FCV mismatch at index %d", i) + + if tt.tlsCertPath != "" { + tlsConfig := process.TLSConfig() + assert.NotNil(t, tlsConfig, "TLS config should not be nil when cert path is provided") + assert.Equal(t, tt.tlsCertPath, tlsConfig["certificateKeyFile"], "TLS cert path mismatch at index %d", i) + } + } + }) + } +} + +func TestCreateMongodProcessesFromMongoDB_AdditionalConfig(t *testing.T) { + config := mdbv1.NewAdditionalMongodConfig("storage.engine", "inMemory"). + AddOption("replication.oplogSizeMB", 2048) + + mdb := mdbv1.NewReplicaSetBuilder(). + SetName("config-rs"). + SetNamespace(defaultNamespace). + SetMembers(2). + SetVersion("7.0.0"). + SetFCVersion(defaultFCV). + SetAdditionalConfig(config). + Build() + + processes := CreateMongodProcessesFromMongoDB( + defaultMongoDBImage, + false, + mdb, + 2, + defaultFCV, + "", + ) + + assert.Len(t, processes, 2) + + for i, process := range processes { + assert.Equal(t, "inMemory", maputil.ReadMapValueAsInterface(process.Args(), "storage", "engine"), + "Storage engine mismatch at index %d", i) + assert.Equal(t, 2048, maputil.ReadMapValueAsInterface(process.Args(), "replication", "oplogSizeMB"), + "OplogSizeMB mismatch at index %d", i) + } +} + +func baseReplicaSet(name string, members int) *mdbv1.MongoDB { + return mdbv1.NewReplicaSetBuilder(). + SetName(name). + SetNamespace(defaultNamespace). + SetMembers(members). + SetVersion("7.0.0"). + SetFCVersion(defaultFCV). + Build() +} diff --git a/controllers/om/replicaset/om_replicaset.go b/controllers/om/replicaset/om_replicaset.go index 2e72d2c3e..e9535fad8 100644 --- a/controllers/om/replicaset/om_replicaset.go +++ b/controllers/om/replicaset/om_replicaset.go @@ -30,6 +30,16 @@ func BuildFromStatefulSetWithReplicas(mongoDBImage string, forceEnterprise bool, return rsWithProcesses } +// BuildFromMongoDBWithReplicas returns a replica set that can be set in the Automation Config +// based on the given MongoDB resource directly without requiring a StatefulSet. +func BuildFromMongoDBWithReplicas(mongoDBImage string, forceEnterprise bool, mdb *mdbv1.MongoDB, replicas int, fcv string, tlsCertPath string) om.ReplicaSetWithProcesses { + members := process.CreateMongodProcessesFromMongoDB(mongoDBImage, forceEnterprise, mdb, replicas, fcv, tlsCertPath) + replicaSet := om.NewReplicaSet(mdb.Name, mdb.Spec.GetMongoDBVersion()) + rsWithProcesses := om.NewReplicaSetWithProcesses(replicaSet, members, mdb.Spec.GetMemberOptions()) + rsWithProcesses.SetHorizons(mdb.Spec.GetHorizonConfig()) + return rsWithProcesses +} + // PrepareScaleDownFromMap performs additional steps necessary to make sure removed members are not primary (so no // election happens and replica set is available) (see // https://jira.mongodb.org/browse/HELP-3818?focusedCommentId=1548348 for more details) @@ -65,30 +75,13 @@ func PrepareScaleDownFromMap(omClient om.Connection, rsMembers map[string][]stri log.Debugw("Marked replica set members as non-voting", "replica set with members", rsMembers) } - // TODO practice shows that automation agents can get stuck on setting db to "disabled" also it seems that this process - // works correctly without explicit disabling - feel free to remove this code after some time when it is clear - // that everything works correctly without disabling - - // Stage 2. Set disabled to true - //err = omClient.ReadUpdateDeployment( - // func(d om.Deployment) error { - // d.DisableProcesses(allProcesses) - // return nil - // }, - //) - // - //if err != nil { - // return errors.New(fmt.Sprintf("Unable to set disabled to true, hosts: %v, err: %w", allProcesses, err)) - //} - //log.Debugw("Disabled processes", "processes", allProcesses) - log.Infow("Performed some preliminary steps to support scale down", "hosts", processes) return nil } -func PrepareScaleDownFromStatefulSet(omClient om.Connection, statefulSet appsv1.StatefulSet, rs *mdbv1.MongoDB, log *zap.SugaredLogger) error { - _, podNames := dns.GetDnsForStatefulSetReplicasSpecified(statefulSet, rs.Spec.GetClusterDomain(), rs.Status.Members, nil) +func PrepareScaleDownFromMongoDB(omClient om.Connection, rs *mdbv1.MongoDB, log *zap.SugaredLogger) error { + _, podNames := dns.GetDNSNames(rs.Name, rs.ServiceName(), rs.Namespace, rs.Spec.GetClusterDomain(), rs.Status.Members, rs.Spec.DbCommonSpec.GetExternalDomain()) podNames = podNames[scale.ReplicasThisReconciliation(rs):rs.Status.Members] if len(podNames) != 1 { diff --git a/controllers/om/replicaset/om_replicaset_test.go b/controllers/om/replicaset/om_replicaset_test.go new file mode 100644 index 000000000..721c70efa --- /dev/null +++ b/controllers/om/replicaset/om_replicaset_test.go @@ -0,0 +1,95 @@ +package replicaset + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "k8s.io/utils/ptr" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + mdbv1 "github.com/mongodb/mongodb-kubernetes/api/v1/mdb" + "github.com/mongodb/mongodb-kubernetes/controllers/om" + "github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/pkg/automationconfig" +) + +// This test focuses on the integration/glue logic, not re-testing components. +func TestBuildFromMongoDBWithReplicas(t *testing.T) { + memberOptions := []automationconfig.MemberOptions{ + {Votes: ptr.To(1), Priority: ptr.To("1.0")}, + {Votes: ptr.To(1), Priority: ptr.To("0.5")}, + {Votes: ptr.To(0), Priority: ptr.To("0")}, // Non-voting member + } + + mdb := &mdbv1.MongoDB{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-rs", + Namespace: "test-namespace", + }, + Spec: mdbv1.MongoDbSpec{ + DbCommonSpec: mdbv1.DbCommonSpec{ + Version: "7.0.5", + Security: &mdbv1.Security{ + TLSConfig: &mdbv1.TLSConfig{}, + Authentication: &mdbv1.Authentication{}, + }, + Connectivity: &mdbv1.MongoDBConnectivity{ + ReplicaSetHorizons: []mdbv1.MongoDBHorizonConfig{}, + }, + }, + Members: 5, // Spec (target) is 5 members + MemberConfig: memberOptions, + }, + } + + // 3 replicas is less than spec.Members, scale up scenario + replicas := 3 + rsWithProcesses := BuildFromMongoDBWithReplicas( + "mongodb/mongodb-enterprise-server:7.0.5", + false, + mdb, + replicas, + "7.0", + "", + ) + + // Assert: ReplicaSet structure + assert.Equal(t, "test-rs", rsWithProcesses.Rs.Name(), "ReplicaSet ID should match MongoDB name") + assert.Equal(t, "1", rsWithProcesses.Rs["protocolVersion"], "Protocol version should be set to 1 for this MongoDB version") + + // Assert: Member count is controlled by replicas parameter, NOT mdb.Spec.Members + members := rsWithProcesses.Rs["members"].([]om.ReplicaSetMember) + assert.Len(t, members, replicas, "Member count should match replicas parameter (3), not mdb.Spec.Members (5)") + assert.Equal(t, 3, len(members), "Should have exactly 3 members") + + // Assert: Processes are created correctly + assert.Len(t, rsWithProcesses.Processes, replicas, "Process count should match replicas parameter") + expectedProcessNames := []string{"test-rs-0", "test-rs-1", "test-rs-2"} + expectedHostnames := []string{ + "test-rs-0.test-rs-svc.test-namespace.svc.cluster.local", + "test-rs-1.test-rs-svc.test-namespace.svc.cluster.local", + "test-rs-2.test-rs-svc.test-namespace.svc.cluster.local", + } + + for i := 0; i < replicas; i++ { + assert.Equal(t, expectedProcessNames[i], rsWithProcesses.Processes[i].Name(), + "Process name mismatch at index %d", i) + assert.Equal(t, expectedHostnames[i], rsWithProcesses.Processes[i].HostName(), + "Process hostname mismatch at index %d", i) + } + + // Assert: Member options are propagated + assert.Equal(t, 1, members[0].Votes(), "Member 0 should have 1 vote") + assert.Equal(t, float32(1.0), members[0].Priority(), "Member 0 should have priority 1.0") + assert.Equal(t, 1, members[1].Votes(), "Member 1 should have 1 vote") + assert.Equal(t, float32(0.5), members[1].Priority(), "Member 1 should have priority 0.5") + assert.Equal(t, 0, members[2].Votes(), "Member 2 should have 0 votes (non-voting)") + assert.Equal(t, float32(0), members[2].Priority(), "Member 2 should have priority 0") + + // Assert: Member host field contains process name (not full hostname) + // Note: ReplicaSetMember["host"] is the process name, not the full hostname + for i := 0; i < replicas; i++ { + assert.Equal(t, expectedProcessNames[i], members[i].Name(), + "Member host should match process name at index %d", i) + } +} diff --git a/controllers/operator/agents/agents.go b/controllers/operator/agents/agents.go index c5eae2656..7366d6e83 100644 --- a/controllers/operator/agents/agents.go +++ b/controllers/operator/agents/agents.go @@ -96,6 +96,19 @@ func WaitForRsAgentsToRegister(set appsv1.StatefulSet, members int, clusterName return nil } +// WaitForRsAgentsToRegisterByResource waits for RS agents to register using MongoDB resource directly without StatefulSet +func WaitForRsAgentsToRegisterByResource(rs *mdbv1.MongoDB, members int, omConnection om.Connection, log *zap.SugaredLogger) error { + hostnames, _ := dns.GetDNSNames(rs.Name, rs.ServiceName(), rs.Namespace, rs.Spec.GetClusterDomain(), members, rs.Spec.DbCommonSpec.GetExternalDomain()) + + log = log.With("mongodb", rs.Name) + + ok, msg := waitUntilRegistered(omConnection, log, retryParams{retrials: 5, waitSeconds: 3}, hostnames...) + if !ok { + return getAgentRegisterError(msg) + } + return nil +} + // WaitForRsAgentsToRegisterSpecifiedHostnames waits for the specified agents to registry with Ops Manager. func WaitForRsAgentsToRegisterSpecifiedHostnames(omConnection om.Connection, hostnames []string, log *zap.SugaredLogger) error { ok, msg := waitUntilRegistered(omConnection, log, retryParams{retrials: 10, waitSeconds: 9}, hostnames...) diff --git a/controllers/operator/mongodbmultireplicaset_controller.go b/controllers/operator/mongodbmultireplicaset_controller.go index 5d963c00e..386ce8a6b 100644 --- a/controllers/operator/mongodbmultireplicaset_controller.go +++ b/controllers/operator/mongodbmultireplicaset_controller.go @@ -166,11 +166,6 @@ func (r *ReconcileMongoDbMultiReplicaSet) Reconcile(ctx context.Context, request r.SetupCommonWatchers(&mrs, nil, nil, mrs.Name) - publishAutomationConfigFirst, err := r.publishAutomationConfigFirstMultiCluster(ctx, &mrs, log) - if err != nil { - return r.updateStatus(ctx, &mrs, workflow.Failed(err), log) - } - // If tls is enabled we need to configure the "processes" array in opsManager/Cloud Manager with the // correct tlsCertPath, with the new tls design, this path has the certHash in it(so that cert can be rotated // without pod restart). @@ -210,6 +205,11 @@ func (r *ReconcileMongoDbMultiReplicaSet) Reconcile(ctx context.Context, request } } + publishAutomationConfigFirst, err := r.publishAutomationConfigFirstMultiCluster(ctx, &mrs, log) + if err != nil { + return r.updateStatus(ctx, &mrs, workflow.Failed(err), log) + } + status := workflow.RunInGivenOrder(publishAutomationConfigFirst, func() workflow.Status { if err := r.updateOmDeploymentRs(ctx, conn, mrs, agentCertPath, tlsCertPath, internalClusterCertPath, false, log); err != nil { diff --git a/controllers/operator/mongodbreplicaset_controller.go b/controllers/operator/mongodbreplicaset_controller.go index 470e56716..a16ac249b 100644 --- a/controllers/operator/mongodbreplicaset_controller.go +++ b/controllers/operator/mongodbreplicaset_controller.go @@ -46,7 +46,9 @@ import ( "github.com/mongodb/mongodb-kubernetes/controllers/searchcontroller" mcoConstruct "github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/controllers/construct" "github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/pkg/kube/annotations" + kubernetesClient "github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/pkg/kube/client" "github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/pkg/kube/configmap" + "github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/pkg/kube/container" "github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/pkg/util/merge" "github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/pkg/util/scale" "github.com/mongodb/mongodb-kubernetes/pkg/dns" @@ -89,6 +91,13 @@ func newReplicaSetReconciler(ctx context.Context, kubeClient client.Client, imag } } +type deploymentOptionsRS struct { + agentCertPath string + agentCertHash string + prometheusCertHash string + currentAgentAuthMode string +} + // Generic Kubernetes Resources // +kubebuilder:rbac:groups=core,resources=namespaces,verbs=list;watch,namespace=placeholder // +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch,namespace=placeholder @@ -108,6 +117,7 @@ func newReplicaSetReconciler(ctx context.Context, kubeClient client.Client, imag // Reconcile reads that state of the cluster for a MongoDbReplicaSet object and makes changes based on the state read // and what is in the MongoDbReplicaSet.Spec func (r *ReconcileMongoDbReplicaSet) Reconcile(ctx context.Context, request reconcile.Request) (res reconcile.Result, e error) { + // === 1. Initial Checks and setup log := zap.S().With("ReplicaSet", request.NamespacedName) rs := &mdbv1.MongoDB{} @@ -155,153 +165,99 @@ func (r *ReconcileMongoDbReplicaSet) Reconcile(ctx context.Context, request reco return r.updateStatus(ctx, rs, status, log) } - status := certs.EnsureSSLCertsForStatefulSet(ctx, r.SecretClient, r.SecretClient, *rs.Spec.Security, certs.ReplicaSetConfig(*rs), log) - if !status.IsOK() { - return r.updateStatus(ctx, rs, status, log) - } - - prometheusCertHash, err := certs.EnsureTLSCertsForPrometheus(ctx, r.SecretClient, rs.GetNamespace(), rs.GetPrometheus(), certs.Database, log) - if err != nil { - log.Infof("Could not generate certificates for Prometheus: %s", err) - return r.updateStatus(ctx, rs, workflow.Pending("%s", err.Error()), log) - } - if status := controlledfeature.EnsureFeatureControls(*rs, conn, conn.OpsManagerVersion(), log); !status.IsOK() { return r.updateStatus(ctx, rs, status, log) } - currentAgentAuthMode, err := conn.GetAgentAuthMode() - if err != nil { - return r.updateStatus(ctx, rs, workflow.Failed(err), log) - } - - certConfigurator := certs.ReplicaSetX509CertConfigurator{MongoDB: rs, SecretClient: r.SecretClient} - status = r.ensureX509SecretAndCheckTLSType(ctx, certConfigurator, currentAgentAuthMode, log) - if !status.IsOK() { - return r.updateStatus(ctx, rs, status, log) - } + // === 2. Auth and Certificates + // Get certificate paths for later use rsCertsConfig := certs.ReplicaSetConfig(*rs) - - var vaultConfig vault.VaultConfiguration var databaseSecretPath string if r.VaultClient != nil { - vaultConfig = r.VaultClient.VaultConfig databaseSecretPath = r.VaultClient.DatabaseSecretPath() } + tlsCertHash := enterprisepem.ReadHashFromSecret(ctx, r.SecretClient, rs.Namespace, rsCertsConfig.CertSecretName, databaseSecretPath, log) + internalClusterCertHash := enterprisepem.ReadHashFromSecret(ctx, r.SecretClient, rs.Namespace, rsCertsConfig.InternalClusterSecretName, databaseSecretPath, log) - var automationAgentVersion string - if architectures.IsRunningStaticArchitecture(rs.Annotations) { - // In case the Agent *is* overridden, its version will be merged into the StatefulSet. The merging process - // happens after creating the StatefulSet definition. - if !rs.IsAgentImageOverridden() { - automationAgentVersion, err = r.getAgentVersion(conn, conn.OpsManagerVersion().VersionString, false, log) - if err != nil { - log.Errorf("Impossible to get agent version, please override the agent image by providing a pod template") - status := workflow.Failed(xerrors.Errorf("Failed to get agent version: %w", err)) - return r.updateStatus(ctx, rs, status, log) - } - } + tlsCertPath := "" + internalClusterCertPath := "" + if internalClusterCertHash != "" { + internalClusterCertPath = fmt.Sprintf("%s%s", util.InternalClusterAuthMountPath, internalClusterCertHash) + } + if tlsCertHash != "" { + tlsCertPath = fmt.Sprintf("%s/%s", util.TLSCertMountPath, tlsCertHash) } - tlsCertHash := enterprisepem.ReadHashFromSecret(ctx, r.SecretClient, rs.Namespace, rsCertsConfig.CertSecretName, databaseSecretPath, log) - internalClusterCertHash := enterprisepem.ReadHashFromSecret(ctx, r.SecretClient, rs.Namespace, rsCertsConfig.InternalClusterSecretName, databaseSecretPath, log) agentCertSecretName := rs.GetSecurity().AgentClientCertificateSecretName(rs.Name) agentCertHash, agentCertPath := r.agentCertHashAndPath(ctx, log, rs.Namespace, agentCertSecretName, databaseSecretPath) - rsConfig := construct.ReplicaSetOptions( - PodEnvVars(newPodVars(conn, projectConfig, rs.Spec.LogLevel)), - CurrentAgentAuthMechanism(currentAgentAuthMode), - CertificateHash(tlsCertHash), - AgentCertHash(agentCertHash), - InternalClusterHash(internalClusterCertHash), - PrometheusTLSCertHash(prometheusCertHash), - WithVaultConfig(vaultConfig), - WithLabels(rs.Labels), - WithAdditionalMongodConfig(rs.Spec.GetAdditionalMongodConfig()), - WithInitDatabaseNonStaticImage(images.ContainerImage(r.imageUrls, util.InitDatabaseImageUrlEnv, r.initDatabaseNonStaticImageVersion)), - WithDatabaseNonStaticImage(images.ContainerImage(r.imageUrls, util.NonStaticDatabaseEnterpriseImage, r.databaseNonStaticImageVersion)), - WithAgentImage(images.ContainerImage(r.imageUrls, architectures.MdbAgentImageRepo, automationAgentVersion)), - WithMongodbImage(images.GetOfficialImage(r.imageUrls, rs.Spec.Version, rs.GetAnnotations())), - ) - - caFilePath := fmt.Sprintf("%s/ca-pem", util.TLSCaMountPath) - - if err := r.reconcileHostnameOverrideConfigMap(ctx, log, r.client, *rs); err != nil { - return r.updateStatus(ctx, rs, workflow.Failed(xerrors.Errorf("Failed to reconcileHostnameOverrideConfigMap: %w", err)), log) + prometheusCertHash, err := certs.EnsureTLSCertsForPrometheus(ctx, r.SecretClient, rs.GetNamespace(), rs.GetPrometheus(), certs.Database, log) + if err != nil { + log.Infof("Could not generate certificates for Prometheus: %s", err) + return r.updateStatus(ctx, rs, workflow.Failed(err), log) } - shouldMirrorKeyfile := r.applySearchOverrides(ctx, rs, log) - - sts := construct.DatabaseStatefulSet(*rs, rsConfig, log) - if status := r.ensureRoles(ctx, rs.Spec.DbCommonSpec, r.enableClusterMongoDBRoles, conn, kube.ObjectKeyFromApiObject(rs), log); !status.IsOK() { - return r.updateStatus(ctx, rs, status, log) + currentAgentAuthMode, err := conn.GetAgentAuthMode() + if err != nil { + return r.updateStatus(ctx, rs, workflow.Failed(xerrors.Errorf("failed to get agent auth mode: %w", err)), log) } + // Check if we need to prepare for scale-down if scale.ReplicasThisReconciliation(rs) < rs.Status.Members { - if err := replicaset.PrepareScaleDownFromStatefulSet(conn, sts, rs, log); err != nil { + if err := replicaset.PrepareScaleDownFromMongoDB(conn, rs, log); err != nil { return r.updateStatus(ctx, rs, workflow.Failed(xerrors.Errorf("Failed to prepare Replica Set for scaling down using Ops Manager: %w", err)), log) } } - - internalClusterCertPath := "" - if internalClusterCertHash != "" { - internalClusterCertPath = fmt.Sprintf("%s%s", util.InternalClusterAuthMountPath, internalClusterCertHash) + deploymentOpts := deploymentOptionsRS{ + prometheusCertHash: prometheusCertHash, + agentCertPath: agentCertPath, + agentCertHash: agentCertHash, + currentAgentAuthMode: currentAgentAuthMode, } - tlsCertPath := "" - if tlsCertHash != "" { - tlsCertPath = fmt.Sprintf("%s/%s", util.TLSCertMountPath, tlsCertHash) - } + // 3. Search Overrides + // Apply search overrides early so searchCoordinator role is present before ensureRoles runs + // This must happen before the ordering logic to ensure roles are synced regardless of order + shouldMirrorKeyfile := r.applySearchOverrides(ctx, rs, log) + + // 4. Recovery // Recovery prevents some deadlocks that can occur during reconciliation, e.g. the setting of an incorrect automation // configuration and a subsequent attempt to overwrite it later, the operator would be stuck in Pending phase. // See CLOUDP-189433 and CLOUDP-229222 for more details. if recovery.ShouldTriggerRecovery(rs.Status.Phase != mdbstatus.PhaseRunning, rs.Status.LastTransition) { log.Warnf("Triggering Automatic Recovery. The MongoDB resource %s/%s is in %s state since %s", rs.Namespace, rs.Name, rs.Status.Phase, rs.Status.LastTransition) - automationConfigStatus := r.updateOmDeploymentRs(ctx, conn, rs.Status.Members, rs, sts, log, agentCertPath, caFilePath, tlsCertPath, internalClusterCertPath, prometheusCertHash, true, shouldMirrorKeyfile).OnErrorPrepend("Failed to create/update (Ops Manager reconciliation phase):") - deploymentError := create.DatabaseInKubernetes(ctx, r.client, *rs, sts, rsConfig, log) - if deploymentError != nil { - log.Errorf("Recovery failed because of deployment errors, %w", deploymentError) + automationConfigStatus := r.updateOmDeploymentRs(ctx, conn, rs.Status.Members, rs, log, tlsCertPath, internalClusterCertPath, deploymentOpts, shouldMirrorKeyfile, true).OnErrorPrepend("Failed to create/update (Ops Manager reconciliation phase):") + reconcileStatus := r.reconcileMemberResources(ctx, rs, conn, log, projectConfig, deploymentOpts) + if !reconcileStatus.IsOK() { + log.Errorf("Recovery failed because of reconcile errors, %v", reconcileStatus) } if !automationConfigStatus.IsOK() { log.Errorf("Recovery failed because of Automation Config update errors, %v", automationConfigStatus) } } + // 5. Actual reconciliation execution, Ops Manager and kubernetes resources update lastSpec, err := rs.GetLastSpec() if err != nil { lastSpec = &mdbv1.MongoDbSpec{} } - status = workflow.RunInGivenOrder(publishAutomationConfigFirst(ctx, r.client, *rs, lastSpec, rsConfig, log), + + publishAutomationConfigFirst := publishAutomationConfigFirstRS(ctx, r.client, *rs, lastSpec, deploymentOpts.currentAgentAuthMode, projectConfig.SSLMMSCAConfigMap, log) + status := workflow.RunInGivenOrder(publishAutomationConfigFirst, func() workflow.Status { - return r.updateOmDeploymentRs(ctx, conn, rs.Status.Members, rs, sts, log, agentCertPath, caFilePath, tlsCertPath, internalClusterCertPath, prometheusCertHash, false, shouldMirrorKeyfile).OnErrorPrepend("Failed to create/update (Ops Manager reconciliation phase):") + return r.updateOmDeploymentRs(ctx, conn, rs.Status.Members, rs, log, tlsCertPath, internalClusterCertPath, deploymentOpts, shouldMirrorKeyfile, false).OnErrorPrepend("Failed to create/update (Ops Manager reconciliation phase):") }, func() workflow.Status { - workflowStatus := create.HandlePVCResize(ctx, r.client, &sts, log) - if !workflowStatus.IsOK() { - return workflowStatus - } - if workflow.ContainsPVCOption(workflowStatus.StatusOptions()) { - _, _ = r.updateStatus(ctx, rs, workflow.Pending(""), log, workflowStatus.StatusOptions()...) - } - - if err := create.DatabaseInKubernetes(ctx, r.client, *rs, sts, rsConfig, log); err != nil { - return workflow.Failed(xerrors.Errorf("Failed to create/update (Kubernetes reconciliation phase): %w", err)) - } - - if status := statefulset.GetStatefulSetStatus(ctx, rs.Namespace, rs.Name, r.client); !status.IsOK() { - return status - } - - log.Info("Updated StatefulSet for replica set") - return workflow.OK() + return r.reconcileMemberResources(ctx, rs, conn, log, projectConfig, deploymentOpts) }) if !status.IsOK() { return r.updateStatus(ctx, rs, status, log) } + // === 6. Final steps if scale.IsStillScaling(rs) { return r.updateStatus(ctx, rs, workflow.Pending("Continuing scaling operation for ReplicaSet %s, desiredMembers=%d, currentMembers=%d", rs.ObjectKey(), rs.DesiredReplicas(), scale.ReplicasThisReconciliation(rs)), log, mdbstatus.MembersOption(rs)) } @@ -333,6 +289,57 @@ func (r *ReconcileMongoDbReplicaSet) Reconcile(ctx context.Context, request reco return r.updateStatus(ctx, rs, workflow.OK(), log, mdbstatus.NewBaseUrlOption(deployment.Link(conn.BaseURL(), conn.GroupID())), mdbstatus.MembersOption(rs), mdbstatus.NewPVCsStatusOptionEmptyStatus()) } +func publishAutomationConfigFirstRS(ctx context.Context, getter kubernetesClient.Client, mdb mdbv1.MongoDB, lastSpec *mdbv1.MongoDbSpec, currentAgentAuthMode string, sslMMSCAConfigMap string, log *zap.SugaredLogger) bool { + namespacedName := kube.ObjectKey(mdb.Namespace, mdb.Name) + currentSts, err := getter.GetStatefulSet(ctx, namespacedName) + if err != nil { + if errors.IsNotFound(err) { + // No need to publish state as this is a new StatefulSet + log.Debugf("New StatefulSet %s", namespacedName) + return false + } + + log.Debugw(fmt.Sprintf("Error getting StatefulSet %s", namespacedName), "error", err) + return false + } + + databaseContainer := container.GetByName(util.DatabaseContainerName, currentSts.Spec.Template.Spec.Containers) + volumeMounts := databaseContainer.VolumeMounts + + if !mdb.Spec.Security.IsTLSEnabled() && wasTLSSecretMounted(ctx, getter, currentSts, mdb, log) { + log.Debug(automationConfigFirstMsg("security.tls.enabled", "false")) + return true + } + + if mdb.Spec.Security.TLSConfig.CA == "" && wasCAConfigMapMounted(ctx, getter, currentSts, mdb, log) { + log.Debug(automationConfigFirstMsg("security.tls.CA", "empty")) + return true + } + + if sslMMSCAConfigMap == "" && statefulset.VolumeMountWithNameExists(volumeMounts, construct.CaCertName) { + log.Debug(automationConfigFirstMsg("SSLMMSCAConfigMap", "empty")) + return true + } + + if mdb.Spec.Security.GetAgentMechanism(currentAgentAuthMode) != util.X509 && statefulset.VolumeMountWithNameExists(volumeMounts, util.AgentSecretName) { + log.Debug(automationConfigFirstMsg("project.AuthMode", "empty")) + return true + } + + if mdb.Spec.Members < int(*currentSts.Spec.Replicas) { + log.Debug("Scaling down operation. automationConfig needs to be updated first") + return true + } + + if architectures.IsRunningStaticArchitecture(mdb.GetAnnotations()) { + if mdb.Spec.IsInChangeVersion(lastSpec) { + return true + } + } + + return false +} + func getHostnameOverrideConfigMapForReplicaset(mdb mdbv1.MongoDB) corev1.ConfigMap { data := make(map[string]string) @@ -368,6 +375,117 @@ func (r *ReconcileMongoDbReplicaSet) reconcileHostnameOverrideConfigMap(ctx cont return nil } +// reconcileMemberResources handles the synchronization of kubernetes resources, which can be statefulsets, services etc. +// All the resources required in the k8s cluster (as opposed to the automation config) for creating the replicaset +// should be reconciled in this method. +func (r *ReconcileMongoDbReplicaSet) reconcileMemberResources(ctx context.Context, rs *mdbv1.MongoDB, conn om.Connection, + log *zap.SugaredLogger, projectConfig mdbv1.ProjectConfig, deploymentOptions deploymentOptionsRS, +) workflow.Status { + // Reconcile hostname override ConfigMap + if err := r.reconcileHostnameOverrideConfigMap(ctx, log, r.client, *rs); err != nil { + return workflow.Failed(xerrors.Errorf("Failed to reconcileHostnameOverrideConfigMap: %w", err)) + } + + // Ensure roles are properly configured + if status := r.ensureRoles(ctx, rs.Spec.DbCommonSpec, r.enableClusterMongoDBRoles, conn, kube.ObjectKeyFromApiObject(rs), log); !status.IsOK() { + return status + } + + return r.reconcileStatefulSet(ctx, rs, log, conn, projectConfig, deploymentOptions) +} + +func (r *ReconcileMongoDbReplicaSet) reconcileStatefulSet(ctx context.Context, rs *mdbv1.MongoDB, + log *zap.SugaredLogger, conn om.Connection, projectConfig mdbv1.ProjectConfig, deploymentOptions deploymentOptionsRS, +) workflow.Status { + certConfigurator := certs.ReplicaSetX509CertConfigurator{MongoDB: rs, SecretClient: r.SecretClient} + status := r.ensureX509SecretAndCheckTLSType(ctx, certConfigurator, deploymentOptions.currentAgentAuthMode, log) + if !status.IsOK() { + return status + } + + status = certs.EnsureSSLCertsForStatefulSet(ctx, r.SecretClient, r.SecretClient, *rs.Spec.Security, certs.ReplicaSetConfig(*rs), log) + if !status.IsOK() { + return status + } + + // Build the replica set config + rsConfig, err := r.buildStatefulSetOptions(ctx, rs, conn, projectConfig, deploymentOptions.currentAgentAuthMode, deploymentOptions.prometheusCertHash, deploymentOptions.agentCertHash, log) + if err != nil { + return workflow.Failed(xerrors.Errorf("failed to build StatefulSet options: %w", err)) + } + + sts := construct.DatabaseStatefulSet(*rs, rsConfig, log) + + // Handle PVC resize if needed + workflowStatus := create.HandlePVCResize(ctx, r.client, &sts, log) + if !workflowStatus.IsOK() { + return workflowStatus + } + if workflow.ContainsPVCOption(workflowStatus.StatusOptions()) { + _, _ = r.updateStatus(ctx, rs, workflow.Pending(""), log, workflowStatus.StatusOptions()...) + } + + // Create or update the StatefulSet in Kubernetes + if err := create.DatabaseInKubernetes(ctx, r.client, *rs, sts, rsConfig, log); err != nil { + return workflow.Failed(xerrors.Errorf("Failed to create/update (Kubernetes reconciliation phase): %w", err)) + } + + // Check StatefulSet status + if status := statefulset.GetStatefulSetStatus(ctx, rs.Namespace, rs.Name, r.client); !status.IsOK() { + return status + } + + log.Info("Updated StatefulSet for replica set") + return workflow.OK() +} + +// buildStatefulSetOptions creates the options needed for constructing the StatefulSet +func (r *ReconcileMongoDbReplicaSet) buildStatefulSetOptions(ctx context.Context, rs *mdbv1.MongoDB, conn om.Connection, projectConfig mdbv1.ProjectConfig, currentAgentAuthMode string, prometheusCertHash string, agentCertHash string, log *zap.SugaredLogger) (func(mdb mdbv1.MongoDB) construct.DatabaseStatefulSetOptions, error) { + rsCertsConfig := certs.ReplicaSetConfig(*rs) + + var vaultConfig vault.VaultConfiguration + var databaseSecretPath string + if r.VaultClient != nil { + vaultConfig = r.VaultClient.VaultConfig + databaseSecretPath = r.VaultClient.DatabaseSecretPath() + } + + // Determine automation agent version for static architecture + var automationAgentVersion string + if architectures.IsRunningStaticArchitecture(rs.Annotations) { + // In case the Agent *is* overridden, its version will be merged into the StatefulSet. The merging process + // happens after creating the StatefulSet definition. + if !rs.IsAgentImageOverridden() { + var err error + automationAgentVersion, err = r.getAgentVersion(conn, conn.OpsManagerVersion().VersionString, false, log) + if err != nil { + return nil, xerrors.Errorf("Impossible to get agent version, please override the agent image by providing a pod template: %w", err) + } + } + } + + tlsCertHash := enterprisepem.ReadHashFromSecret(ctx, r.SecretClient, rs.Namespace, rsCertsConfig.CertSecretName, databaseSecretPath, log) + internalClusterCertHash := enterprisepem.ReadHashFromSecret(ctx, r.SecretClient, rs.Namespace, rsCertsConfig.InternalClusterSecretName, databaseSecretPath, log) + + rsConfig := construct.ReplicaSetOptions( + PodEnvVars(newPodVars(conn, projectConfig, rs.Spec.LogLevel)), + CurrentAgentAuthMechanism(currentAgentAuthMode), + CertificateHash(tlsCertHash), + AgentCertHash(agentCertHash), + InternalClusterHash(internalClusterCertHash), + PrometheusTLSCertHash(prometheusCertHash), + WithVaultConfig(vaultConfig), + WithLabels(rs.Labels), + WithAdditionalMongodConfig(rs.Spec.GetAdditionalMongodConfig()), + WithInitDatabaseNonStaticImage(images.ContainerImage(r.imageUrls, util.InitDatabaseImageUrlEnv, r.initDatabaseNonStaticImageVersion)), + WithDatabaseNonStaticImage(images.ContainerImage(r.imageUrls, util.NonStaticDatabaseEnterpriseImage, r.databaseNonStaticImageVersion)), + WithAgentImage(images.ContainerImage(r.imageUrls, architectures.MdbAgentImageRepo, automationAgentVersion)), + WithMongodbImage(images.GetOfficialImage(r.imageUrls, rs.Spec.Version, rs.GetAnnotations())), + ) + + return rsConfig, nil +} + // AddReplicaSetController creates a new MongoDbReplicaset Controller and adds it to the Manager. The Manager will set fields on the Controller // and Start it when the Manager is Started. func AddReplicaSetController(ctx context.Context, mgr manager.Manager, imageUrls images.ImageUrls, initDatabaseNonStaticImageVersion, databaseNonStaticImageVersion string, forceEnterprise bool, enableClusterMongoDBRoles bool) error { @@ -449,36 +567,47 @@ func AddReplicaSetController(ctx context.Context, mgr manager.Manager, imageUrls // updateOmDeploymentRs performs OM registration operation for the replicaset. So the changes will be finally propagated // to automation agents in containers -func (r *ReconcileMongoDbReplicaSet) updateOmDeploymentRs(ctx context.Context, conn om.Connection, membersNumberBefore int, rs *mdbv1.MongoDB, set appsv1.StatefulSet, log *zap.SugaredLogger, agentCertPath, caFilePath, tlsCertPath, internalClusterCertPath string, prometheusCertHash string, isRecovering bool, shouldMirrorKeyfileForMongot bool) workflow.Status { +func (r *ReconcileMongoDbReplicaSet) updateOmDeploymentRs(ctx context.Context, conn om.Connection, membersNumberBefore int, rs *mdbv1.MongoDB, log *zap.SugaredLogger, tlsCertPath, internalClusterCertPath string, deploymentOptionsRS deploymentOptionsRS, shouldMirrorKeyfile bool, isRecovering bool) workflow.Status { log.Debug("Entering UpdateOMDeployments") // Only "concrete" RS members should be observed // - if scaling down, let's observe only members that will remain after scale-down operation // - if scaling up, observe only current members, because new ones might not exist yet - err := agents.WaitForRsAgentsToRegister(set, util_int.Min(membersNumberBefore, int(*set.Spec.Replicas)), rs.Spec.GetClusterDomain(), conn, log, rs) + replicasTarget := scale.ReplicasThisReconciliation(rs) + err := agents.WaitForRsAgentsToRegisterByResource(rs, util_int.Min(membersNumberBefore, replicasTarget), conn, log) if err != nil && !isRecovering { return workflow.Failed(err) } + caFilePath := fmt.Sprintf("%s/ca-pem", util.TLSCaMountPath) // If current operation is to Disable TLS, then we should the current members of the Replica Set, // this is, do not scale them up or down util TLS disabling has completed. - shouldLockMembers, err := updateOmDeploymentDisableTLSConfiguration(conn, r.imageUrls[mcoConstruct.MongodbImageEnv], r.forceEnterprise, membersNumberBefore, rs, set, log, caFilePath, tlsCertPath) + shouldLockMembers, err := updateOmDeploymentDisableTLSConfiguration(conn, r.imageUrls[mcoConstruct.MongodbImageEnv], r.forceEnterprise, membersNumberBefore, rs, log, caFilePath, tlsCertPath) if err != nil && !isRecovering { return workflow.Failed(err) } var updatedMembers int + // This lock member logic will be removed soon, we should rather block possibility to disable tls + scale + // Tracked in CLOUDP-349087 if shouldLockMembers { // We should not add or remove members during this run, we'll wait for // TLS to be completely disabled first. - updatedMembers = membersNumberBefore + // However, on first reconciliation (membersNumberBefore=0), we need to use replicasTarget + // because the OM deployment is initialized with TLS enabled by default. + log.Debugf("locking members for this reconciliation because TLS was disabled") + if membersNumberBefore == 0 { + updatedMembers = replicasTarget + } else { + updatedMembers = membersNumberBefore + } } else { - updatedMembers = int(*set.Spec.Replicas) + updatedMembers = replicasTarget } - replicaSet := replicaset.BuildFromStatefulSetWithReplicas(r.imageUrls[mcoConstruct.MongodbImageEnv], r.forceEnterprise, set, rs.GetSpec(), updatedMembers, rs.CalculateFeatureCompatibilityVersion(), tlsCertPath) + replicaSet := replicaset.BuildFromMongoDBWithReplicas(r.imageUrls[mcoConstruct.MongodbImageEnv], r.forceEnterprise, rs, updatedMembers, rs.CalculateFeatureCompatibilityVersion(), tlsCertPath) processNames := replicaSet.GetProcessNames() - status, additionalReconciliationRequired := r.updateOmAuthentication(ctx, conn, processNames, rs, agentCertPath, caFilePath, internalClusterCertPath, isRecovering, log) + status, additionalReconciliationRequired := r.updateOmAuthentication(ctx, conn, processNames, rs, deploymentOptionsRS.agentCertPath, caFilePath, internalClusterCertPath, isRecovering, log) if !status.IsOK() && !isRecovering { return status } @@ -488,22 +617,22 @@ func (r *ReconcileMongoDbReplicaSet) updateOmDeploymentRs(ctx context.Context, c return workflow.Failed(err) } - p := PrometheusConfiguration{ + prometheusConfiguration := PrometheusConfiguration{ prometheus: rs.GetPrometheus(), conn: conn, secretsClient: r.SecretClient, namespace: rs.GetNamespace(), - prometheusCertHash: prometheusCertHash, + prometheusCertHash: deploymentOptionsRS.prometheusCertHash, } err = conn.ReadUpdateDeployment( func(d om.Deployment) error { - if shouldMirrorKeyfileForMongot { + if shouldMirrorKeyfile { if err := r.mirrorKeyfileIntoSecretForMongot(ctx, d, rs, log); err != nil { return err } } - return ReconcileReplicaSetAC(ctx, d, rs.Spec.DbCommonSpec, lastRsConfig.ToMap(), rs.Name, replicaSet, caFilePath, internalClusterCertPath, &p, log) + return ReconcileReplicaSetAC(ctx, d, rs.Spec.DbCommonSpec, lastRsConfig.ToMap(), rs.Name, replicaSet, caFilePath, internalClusterCertPath, &prometheusConfiguration, log) }, log, ) @@ -525,9 +654,8 @@ func (r *ReconcileMongoDbReplicaSet) updateOmDeploymentRs(ctx context.Context, c return workflow.Pending("Performing multi stage reconciliation") } - externalDomain := rs.Spec.DbCommonSpec.GetExternalDomain() - hostsBefore := getAllHostsRs(set, rs.Spec.GetClusterDomain(), membersNumberBefore, externalDomain) - hostsAfter := getAllHostsRs(set, rs.Spec.GetClusterDomain(), scale.ReplicasThisReconciliation(rs), externalDomain) + hostsBefore := getAllHostsForReplicas(rs, membersNumberBefore) + hostsAfter := getAllHostsForReplicas(rs, scale.ReplicasThisReconciliation(rs)) if err := host.CalculateDiffAndStopMonitoring(conn, hostsBefore, hostsAfter, log); err != nil && !isRecovering { return workflow.Failed(err) @@ -544,7 +672,7 @@ func (r *ReconcileMongoDbReplicaSet) updateOmDeploymentRs(ctx context.Context, c // updateOmDeploymentDisableTLSConfiguration checks if TLS configuration needs // to be disabled. In which case it will disable it and inform to the calling // function. -func updateOmDeploymentDisableTLSConfiguration(conn om.Connection, mongoDBImage string, forceEnterprise bool, membersNumberBefore int, rs *mdbv1.MongoDB, set appsv1.StatefulSet, log *zap.SugaredLogger, caFilePath, tlsCertPath string) (bool, error) { +func updateOmDeploymentDisableTLSConfiguration(conn om.Connection, mongoDBImage string, forceEnterprise bool, membersNumberBefore int, rs *mdbv1.MongoDB, log *zap.SugaredLogger, caFilePath, tlsCertPath string) (bool, error) { tlsConfigWasDisabled := false err := conn.ReadUpdateDeployment( @@ -558,7 +686,7 @@ func updateOmDeploymentDisableTLSConfiguration(conn om.Connection, mongoDBImage // configure as many agents/Pods as we currently have, no more (in case // there's a scale up change at the same time). - replicaSet := replicaset.BuildFromStatefulSetWithReplicas(mongoDBImage, forceEnterprise, set, rs.GetSpec(), membersNumberBefore, rs.CalculateFeatureCompatibilityVersion(), tlsCertPath) + replicaSet := replicaset.BuildFromMongoDBWithReplicas(mongoDBImage, forceEnterprise, rs, membersNumberBefore, rs.CalculateFeatureCompatibilityVersion(), tlsCertPath) lastConfig, err := rs.GetLastAdditionalMongodConfigByType(mdbv1.ReplicaSetConfig) if err != nil { @@ -639,8 +767,8 @@ func (r *ReconcileMongoDbReplicaSet) OnDelete(ctx context.Context, obj runtime.O return nil } -func getAllHostsRs(set appsv1.StatefulSet, clusterName string, membersCount int, externalDomain *string) []string { - hostnames, _ := dns.GetDnsForStatefulSetReplicasSpecified(set, clusterName, membersCount, externalDomain) +func getAllHostsForReplicas(rs *mdbv1.MongoDB, membersCount int) []string { + hostnames, _ := dns.GetDNSNames(rs.Name, rs.ServiceName(), rs.Namespace, rs.Spec.GetClusterDomain(), membersCount, rs.Spec.DbCommonSpec.GetExternalDomain()) return hostnames } diff --git a/controllers/operator/mongodbreplicaset_controller_test.go b/controllers/operator/mongodbreplicaset_controller_test.go index e85e7050d..787883ffb 100644 --- a/controllers/operator/mongodbreplicaset_controller_test.go +++ b/controllers/operator/mongodbreplicaset_controller_test.go @@ -396,26 +396,24 @@ func TestUpdateDeploymentTLSConfiguration(t *testing.T) { rsNoTLS := mdbv1.NewReplicaSetBuilder().Build() deploymentWithTLS := deployment.CreateFromReplicaSet("fake-mongoDBImage", false, rsWithTLS) deploymentNoTLS := deployment.CreateFromReplicaSet("fake-mongoDBImage", false, rsNoTLS) - stsWithTLS := construct.DatabaseStatefulSet(*rsWithTLS, construct.ReplicaSetOptions(construct.GetPodEnvOptions()), zap.S()) - stsNoTLS := construct.DatabaseStatefulSet(*rsNoTLS, construct.ReplicaSetOptions(construct.GetPodEnvOptions()), zap.S()) // TLS Disabled -> TLS Disabled - shouldLockMembers, err := updateOmDeploymentDisableTLSConfiguration(om.NewMockedOmConnection(deploymentNoTLS), "fake-mongoDBImage", false, 3, rsNoTLS, stsNoTLS, zap.S(), util.CAFilePathInContainer, "") + shouldLockMembers, err := updateOmDeploymentDisableTLSConfiguration(om.NewMockedOmConnection(deploymentNoTLS), "fake-mongoDBImage", false, 3, rsNoTLS, zap.S(), util.CAFilePathInContainer, "") assert.NoError(t, err) assert.False(t, shouldLockMembers) // TLS Disabled -> TLS Enabled - shouldLockMembers, err = updateOmDeploymentDisableTLSConfiguration(om.NewMockedOmConnection(deploymentNoTLS), "fake-mongoDBImage", false, 3, rsWithTLS, stsWithTLS, zap.S(), util.CAFilePathInContainer, "") + shouldLockMembers, err = updateOmDeploymentDisableTLSConfiguration(om.NewMockedOmConnection(deploymentNoTLS), "fake-mongoDBImage", false, 3, rsWithTLS, zap.S(), util.CAFilePathInContainer, "") assert.NoError(t, err) assert.False(t, shouldLockMembers) // TLS Enabled -> TLS Enabled - shouldLockMembers, err = updateOmDeploymentDisableTLSConfiguration(om.NewMockedOmConnection(deploymentWithTLS), "fake-mongoDBImage", false, 3, rsWithTLS, stsWithTLS, zap.S(), util.CAFilePathInContainer, "") + shouldLockMembers, err = updateOmDeploymentDisableTLSConfiguration(om.NewMockedOmConnection(deploymentWithTLS), "fake-mongoDBImage", false, 3, rsWithTLS, zap.S(), util.CAFilePathInContainer, "") assert.NoError(t, err) assert.False(t, shouldLockMembers) // TLS Enabled -> TLS Disabled - shouldLockMembers, err = updateOmDeploymentDisableTLSConfiguration(om.NewMockedOmConnection(deploymentWithTLS), "fake-mongoDBImage", false, 3, rsNoTLS, stsNoTLS, zap.S(), util.CAFilePathInContainer, "") + shouldLockMembers, err = updateOmDeploymentDisableTLSConfiguration(om.NewMockedOmConnection(deploymentWithTLS), "fake-mongoDBImage", false, 3, rsNoTLS, zap.S(), util.CAFilePathInContainer, "") assert.NoError(t, err) assert.True(t, shouldLockMembers) } @@ -1202,3 +1200,204 @@ func (b *ReplicaSetBuilder) Build() *mdbv1.MongoDB { b.InitDefaults() return b.DeepCopy() } + +// Helper functions for TestPublishAutomationConfigFirstRS + +func baseTestStatefulSet(name string, replicas int32) *appsv1.StatefulSet { + return &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: mock.TestNamespace, + }, + Spec: appsv1.StatefulSetSpec{ + Replicas: ptr.To(replicas), + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: util.DatabaseContainerName}, + }, + }, + }, + }, + } +} + +func baseTestMongoDB(name string, members int) mdbv1.MongoDB { + return mdbv1.MongoDB{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: mock.TestNamespace, + }, + Spec: mdbv1.MongoDbSpec{ + DbCommonSpec: mdbv1.DbCommonSpec{ + Security: &mdbv1.Security{ + TLSConfig: &mdbv1.TLSConfig{}, + Authentication: &mdbv1.Authentication{}, + }, + }, + Members: members, + }, + } +} + +// TestPublishAutomationConfigFirstRS tests the publishAutomationConfigFirstRS function which determines +// whether the OM automation config should be updated before the StatefulSet in certain scenarios +// (e.g., TLS disabled, CA removed, scaling down, agent auth changes, version changes). +func TestPublishAutomationConfigFirstRS(t *testing.T) { + ctx := context.Background() + + testCases := []struct { + name string + existingSts *appsv1.StatefulSet + mdb mdbv1.MongoDB + lastSpec *mdbv1.MongoDbSpec + currentAgentAuthMode string + sslMMSCAConfigMap string + extraObjects []client.Object + expectedPublishACFirst bool + }{ + { + name: "New StatefulSet", + existingSts: nil, + mdb: func() mdbv1.MongoDB { + m := baseTestMongoDB("test-rs", 3) + m.Spec.Security = nil // Simple case without security + return m + }(), + expectedPublishACFirst: false, + }, + { + name: "Scaling down", + existingSts: baseTestStatefulSet("test-rs", 5), + mdb: baseTestMongoDB("test-rs", 3), + expectedPublishACFirst: true, + }, + { + name: "TLS disabled with mounted cert", + existingSts: func() *appsv1.StatefulSet { + sts := baseTestStatefulSet("test-rs", 3) + sts.Spec.Template.Spec.Volumes = []corev1.Volume{ + { + Name: util.SecretVolumeName, + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{SecretName: "test-rs-cert"}, + }, + }, + } + sts.Spec.Template.Spec.Containers[0].VolumeMounts = []corev1.VolumeMount{ + {Name: util.SecretVolumeName, MountPath: "/tls"}, + } + return sts + }(), + mdb: func() mdbv1.MongoDB { + m := baseTestMongoDB("test-rs", 3) + m.Spec.Security.TLSConfig.Enabled = false + return m + }(), + extraObjects: []client.Object{ + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test-rs-cert", Namespace: mock.TestNamespace}, + }, + }, + expectedPublishACFirst: true, + }, + { + name: "CA configmap removed", + existingSts: func() *appsv1.StatefulSet { + sts := baseTestStatefulSet("test-rs", 3) + sts.Spec.Template.Spec.Volumes = []corev1.Volume{ + { + Name: util.ConfigMapVolumeCAMountPath, + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{Name: "custom-ca-configmap"}, + }, + }, + }, + } + sts.Spec.Template.Spec.Containers[0].VolumeMounts = []corev1.VolumeMount{ + {Name: util.ConfigMapVolumeCAMountPath, MountPath: "/ca"}, + } + return sts + }(), + mdb: func() mdbv1.MongoDB { + m := baseTestMongoDB("test-rs", 3) + m.Spec.Security.TLSConfig.Enabled = true + m.Spec.Security.TLSConfig.CA = "" + return m + }(), + extraObjects: []client.Object{ + &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "custom-ca-configmap", Namespace: mock.TestNamespace}, + }, + }, + expectedPublishACFirst: true, + }, + { + name: "SSL MMS CA removed", + existingSts: func() *appsv1.StatefulSet { + sts := baseTestStatefulSet("test-rs", 3) + sts.Spec.Template.Spec.Containers[0].VolumeMounts = []corev1.VolumeMount{ + {Name: construct.CaCertName, MountPath: "/mms-ca"}, + } + return sts + }(), + mdb: baseTestMongoDB("test-rs", 3), + sslMMSCAConfigMap: "", + expectedPublishACFirst: true, + }, + { + name: "Agent X509 disabled", + existingSts: func() *appsv1.StatefulSet { + sts := baseTestStatefulSet("test-rs", 3) + sts.Spec.Template.Spec.Containers[0].VolumeMounts = []corev1.VolumeMount{ + {Name: util.AgentSecretName, MountPath: "/agent-certs"}, + } + return sts + }(), + mdb: func() mdbv1.MongoDB { + m := baseTestMongoDB("test-rs", 3) + m.Spec.Security.Authentication.Agents = mdbv1.AgentAuthentication{Mode: "SCRAM"} + return m + }(), + currentAgentAuthMode: "X509", + expectedPublishACFirst: true, + }, + { + name: "Version change in static architecture", + existingSts: baseTestStatefulSet("test-rs", 3), + mdb: func() mdbv1.MongoDB { + m := baseTestMongoDB("test-rs", 3) + m.Annotations = map[string]string{ + architectures.ArchitectureAnnotation: string(architectures.Static), + } + m.Spec.Version = "8.0.0" + return m + }(), + lastSpec: &mdbv1.MongoDbSpec{ + DbCommonSpec: mdbv1.DbCommonSpec{Version: "7.0.0"}, + }, + expectedPublishACFirst: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + objects := []client.Object{} + if tc.existingSts != nil { + objects = append(objects, tc.existingSts) + } + objects = append(objects, tc.extraObjects...) + + fakeClient := mock.NewEmptyFakeClientBuilder(). + WithObjects(objects...). + WithObjects(mock.GetDefaultResources()...). + Build() + kubeClient := kubernetesClient.NewClient(fakeClient) + + result := publishAutomationConfigFirstRS(ctx, kubeClient, tc.mdb, tc.lastSpec, tc.currentAgentAuthMode, tc.sslMMSCAConfigMap, zap.S()) + + assert.Equal(t, tc.expectedPublishACFirst, result) + }) + } +}