From 675ce20fc4549b2bea71fb7ab74daa65aab25c07 Mon Sep 17 00:00:00 2001 From: Jakub Koterba Date: Wed, 8 Apr 2026 10:02:21 +0200 Subject: [PATCH 1/2] sync logic rewrite + tests and constants logging changed, pureness fix attempt removed redundant sync at the end incremental state building with limited redundancy logging align cluster unit adj merge adjustments event emmision placed back, fortified with tests allign with requirements on state building review and rebase changes merge alignment review changes --- .../postgrescluster_controller_test.go | 91 +- pkg/postgresql/cluster/core/cluster.go | 1518 +++++++++++++---- .../cluster/core/cluster_unit_test.go | 896 +++++++++- pkg/postgresql/cluster/core/events.go | 11 + pkg/postgresql/cluster/core/types.go | 87 +- .../core/types/constants/components.go | 9 + .../cluster/core/types/constants/state.go | 24 + 7 files changed, 2214 insertions(+), 422 deletions(-) create mode 100644 pkg/postgresql/cluster/core/types/constants/components.go create mode 100644 pkg/postgresql/cluster/core/types/constants/state.go diff --git a/internal/controller/postgrescluster_controller_test.go b/internal/controller/postgrescluster_controller_test.go index 80b412562..b7becdff5 100644 --- a/internal/controller/postgrescluster_controller_test.go +++ b/internal/controller/postgrescluster_controller_test.go @@ -19,7 +19,9 @@ package controller import ( "context" "fmt" + "strings" + v1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/api/resource" @@ -52,6 +54,20 @@ import ( * PC-09 ignores no-op updates */ +func containsEvents(events *[]string, recorder *record.FakeRecorder, eventType string, event string) bool { + for { + select { + case e := <-recorder.Events: + *events = append(*events, e) + if strings.Contains(e, eventType) && strings.Contains(e, event) { + return true + } + default: + return false + } + } +} + var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { const ( @@ -79,6 +95,7 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { pgClusterClassKey types.NamespacedName reconciler *PostgresClusterReconciler req reconcile.Request + fakeRecorder *record.FakeRecorder ) reconcileNTimes := func(times int) { @@ -162,13 +179,17 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Spec: enterprisev4.PostgresClusterSpec{ Class: className, ClusterDeletionPolicy: ptr.To(deletePolicy), + ManagedRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + {Name: "app_user_rw", Exists: true}, + }, }, } - + fakeRecorder = record.NewFakeRecorder(100) reconciler = &PostgresClusterReconciler{ Client: k8sClient, Scheme: k8sClient.Scheme(), - Recorder: record.NewFakeRecorder(100), + Recorder: fakeRecorder, Metrics: &pgprometheus.NoopRecorder{}, FleetCollector: pgprometheus.NewFleetCollector(), } @@ -255,12 +276,17 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { cond := meta.FindStatusCondition(pc.Status.Conditions, "ClusterReady") Expect(cond).NotTo(BeNil()) Expect(cond.Status).To(Equal(metav1.ConditionFalse)) - Expect(cond.Reason).To(Equal("ClusterBuildSucceeded")) + Expect(cond.Reason).To(Equal("CNPGClusterProvisioning")) // Simulate external CNPG controller status progression. cnpg := &cnpgv1.Cluster{} Expect(k8sClient.Get(ctx, pgClusterKey, cnpg)).To(Succeed()) cnpg.Status.Phase = cnpgv1.PhaseHealthy + cnpg.Status.ManagedRolesStatus = cnpgv1.ManagedRoles{ + ByStatus: map[cnpgv1.RoleStatus][]string{ + cnpgv1.RoleStatusReconciled: {"app_user", "app_user_rw"}, + }, + } Expect(k8sClient.Status().Update(ctx, cnpg)).To(Succeed()) reconcileNTimes(1) @@ -270,9 +296,47 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Expect(cond).NotTo(BeNil()) Expect(cond.Status).To(Equal(metav1.ConditionTrue)) Expect(cond.Reason).To(Equal("CNPGClusterHealthy")) + + secretCond := meta.FindStatusCondition(pc.Status.Conditions, "SecretsReady") + Expect(secretCond).NotTo(BeNil()) + Expect(secretCond.Status).To(Equal(metav1.ConditionTrue)) + Expect(secretCond.Reason).To(Equal("SuperUserSecretReady")) + + configMapCond := meta.FindStatusCondition(pc.Status.Conditions, "ConfigMapsReady") + Expect(configMapCond).NotTo(BeNil()) + Expect(configMapCond.Status).To(Equal(metav1.ConditionTrue)) + Expect(configMapCond.Reason).To(Equal("ConfigMapReconciled")) + + managedRolesCond := meta.FindStatusCondition(pc.Status.Conditions, "ManagedRolesReady") + Expect(managedRolesCond).NotTo(BeNil()) + Expect(managedRolesCond.Status).To(Equal(metav1.ConditionTrue)) + Expect(managedRolesCond.Reason).To(Equal("ManagedRolesReconciled")) + + // Pooler is disabled in this suite fixture, but converge publishes PoolerReady=True with disabled message. + poolerCond := meta.FindStatusCondition(pc.Status.Conditions, "PoolerReady") + Expect(poolerCond).NotTo(BeNil()) + Expect(poolerCond.Status).To(Equal(metav1.ConditionTrue)) + Expect(poolerCond.Reason).To(Equal("AllInstancesReady")) + Expect(poolerCond.Message).To(Equal("Connection pooler disabled")) + + Expect(pc.Status.ManagedRolesStatus).NotTo(BeNil()) + Expect(pc.Status.ManagedRolesStatus.Reconciled).To(ContainElements("app_user", "app_user_rw")) + + Expect(pc.Status.Phase).NotTo(BeNil()) + Expect(*pc.Status.Phase).To(Equal("Ready")) + Expect(pc.Status.ProvisionerRef).NotTo(BeNil()) + Expect(pc.Status.ProvisionerRef.Kind).To(Equal("Cluster")) + Expect(pc.Status.ProvisionerRef.Name).To(Equal(clusterName)) + Expect(pc.Status.Resources).NotTo(BeNil()) Expect(pc.Status.Resources.SuperUserSecretRef).NotTo(BeNil()) Expect(pc.Status.Resources.ConfigMapRef).NotTo(BeNil()) + + received := make([]string, 0, 8) + Expect(containsEvents( + &received, fakeRecorder, + v1.EventTypeNormal, core.EventClusterReady, + )).To(BeTrue(), "events seen: %v", received) }) // PC-07 @@ -308,6 +372,7 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Context("with PostgreSQL metrics enabled in class", func() { BeforeEach(func() { pgCluster.Spec.Class = classNameMetrics + pgCluster.Spec.ManagedRoles = nil }) It("adds scrape annotations to the CNPG Cluster", func() { @@ -316,6 +381,17 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { cnpg := &cnpgv1.Cluster{} Expect(k8sClient.Get(ctx, pgClusterKey, cnpg)).To(Succeed()) + + cnpg.Status.Phase = cnpgv1.PhaseHealthy + cnpg.Status.ManagedRolesStatus = cnpgv1.ManagedRoles{ + ByStatus: map[cnpgv1.RoleStatus][]string{ + cnpgv1.RoleStatusReconciled: {"app_user", "app_user_rw"}, + }, + } + Expect(k8sClient.Status().Update(ctx, cnpg)).To(Succeed()) + + reconcileNTimes(1) + Expect(cnpg.Spec.InheritedMetadata).NotTo(BeNil()) Expect(cnpg.Spec.InheritedMetadata.Annotations).To(HaveKeyWithValue(scrapeAnnotationKey, "true")) Expect(cnpg.Spec.InheritedMetadata.Annotations).To(HaveKeyWithValue(pathAnnotationKey, metricsPath)) @@ -353,6 +429,7 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Context("with connection pooler metrics enabled in class", func() { BeforeEach(func() { pgCluster.Spec.Class = classNamePooler + pgCluster.Spec.ManagedRoles = nil }) It("adds scrape annotations to poolers only after the CNPG cluster becomes healthy", func() { @@ -439,7 +516,7 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { When("reconciling with invalid or drifted dependencies", func() { // PC-05 Context("when referenced class does not exist", func() { - It("fails with class-not-found condition", func() { + It("fails with class-not-found condition and emits a warning event", func() { badName := "bad-" + clusterName badKey := types.NamespacedName{Name: badName, Namespace: namespace} @@ -464,6 +541,12 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { cond := meta.FindStatusCondition(current.Status.Conditions, "ClusterReady") return cond != nil && cond.Reason == "ClusterClassNotFound" }, "20s", "250ms").Should(BeTrue()) + + received := make([]string, 0, 8) + Expect(containsEvents( + &received, fakeRecorder, + v1.EventTypeWarning, core.EventClusterClassNotFound, + )).To(BeTrue(), "events seen: %v", received) }) }) diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index 8aefdea45..f0efc1926 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -20,10 +20,13 @@ import ( "context" "errors" "fmt" + "sort" + "strings" cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" password "github.com/sethvargo/go-password/password" enterprisev4 "github.com/splunk/splunk-operator/api/v4" + pgcConstants "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core/types/constants" "github.com/splunk/splunk-operator/pkg/postgresql/shared/ports" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" @@ -67,8 +70,36 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. logger = logger.WithValues("postgresCluster", postgresCluster.Name) ctx = log.IntoContext(ctx, logger) + currentPhase := func() string { + if postgresCluster.Status.Phase == nil { + return "" + } + return *postgresCluster.Status.Phase + } + updateStatus := func(conditionType conditionTypes, status metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileClusterPhases) error { - return setStatus(ctx, c, rc.Metrics, postgresCluster, conditionType, status, reason, message, phase) + oldPhase := currentPhase() + if err := setStatus(ctx, c, rc.Metrics, postgresCluster, conditionType, status, reason, message, phase); err != nil { + return err + } + rc.emitClusterPhaseTransition(postgresCluster, oldPhase, currentPhase()) + return nil + } + updateComponentHealthStatus := func(health componentHealth) error { + oldPhase := currentPhase() + if err := setStatusFromHealth(ctx, c, rc.Metrics, postgresCluster, health); err != nil { + return err + } + rc.emitClusterPhaseTransition(postgresCluster, oldPhase, currentPhase()) + return nil + } + updatePhaseStatus := func(phase reconcileClusterPhases) error { + oldPhase := currentPhase() + if err := setPhaseStatus(ctx, c, postgresCluster, phase); err != nil { + return err + } + rc.emitClusterPhaseTransition(postgresCluster, oldPhase, currentPhase()) + return nil } // Finalizer handling must come before any other processing. @@ -119,8 +150,6 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. return ctrl.Result{}, errors.Join(err, statusErr) } - postgresMetricsEnabled := isPostgreSQLMetricsEnabled(postgresCluster, clusterClass) - poolerMetricsEnabled := isConnectionPoolerMetricsEnabled(postgresCluster, clusterClass) // Resolve or derive the superuser secret name. if postgresCluster.Status.Resources != nil && postgresCluster.Status.Resources.SuperUserSecretRef != nil { postgresSecretName = postgresCluster.Status.Resources.SuperUserSecretRef.Name @@ -130,302 +159,1131 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. logger.Info("Superuser secret name derived", "name", postgresSecretName) } - secretExists, secretErr := clusterSecretExists(ctx, c, postgresCluster.Namespace, postgresSecretName, secret) - if secretErr != nil { - logger.Error(secretErr, "Failed to check if PostgresCluster secret exists", "name", postgresSecretName) - rc.emitWarning(postgresCluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to check secret existence: %v", secretErr)) - statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonUserSecretFailed, - fmt.Sprintf("Failed to check secret existence: %v", secretErr), failedClusterPhase) - return ctrl.Result{}, errors.Join(secretErr, statusErr) + poolerEnabled = mergedConfig.Spec.ConnectionPoolerEnabled != nil && *mergedConfig.Spec.ConnectionPoolerEnabled + poolerConfigPresent := mergedConfig.CNPG != nil && mergedConfig.CNPG.ConnectionPooler != nil + + secretComponent := newSecretModel(c, rc.Scheme, rc, updateComponentHealthStatus, postgresCluster, postgresSecretName) + clusterComponent := newClusterModel(c, rc.Scheme, rc, updateComponentHealthStatus, postgresCluster, clusterClass, mergedConfig, postgresSecretName) + + bootstrapComponents := []component{ + secretComponent, + clusterComponent, } - if !secretExists { - logger.Info("Superuser secret creation started", "name", postgresSecretName) - if err := ensureClusterSecret(ctx, c, rc.Scheme, postgresCluster, postgresSecretName, secret); err != nil { - logger.Error(err, "Failed to ensure PostgresCluster secret", "name", postgresSecretName) - rc.emitWarning(postgresCluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to generate cluster secret: %v", err)) - statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonUserSecretFailed, - fmt.Sprintf("Failed to generate PostgresCluster secret: %v", err), failedClusterPhase) - return ctrl.Result{}, errors.Join(err, statusErr) + + phase := func(component component) (ctrl.Result, error) { + componentLogger := logger.WithValues("component", component.Name()) + gate, gateErr := component.EvaluatePrerequisites(ctx) + if gateErr != nil { + if isTransientError(gateErr) { + componentLogger.Error(gateErr, "Component prerequisite transient error, requeueing", "step", "prerequisites") + return transientResult(gateErr), nil + } + componentLogger.Error(gateErr, "Component prerequisite evaluation failed", "step", "prerequisites") + return ctrl.Result{}, fmt.Errorf("%s prerequisites: %w", component.Name(), gateErr) } - if err := c.Status().Update(ctx, postgresCluster); err != nil { - logger.Error(err, "Failed to update status after secret creation") - return ctrl.Result{}, err + if !gate.Allowed { + componentLogger.Info("Component blocked by prerequisites", + "step", "prerequisites", + "condition", gate.Health.Condition, + "reason", gate.Health.Reason, + "phase", gate.Health.Phase, + "requeueAfter", gate.Health.Result.RequeueAfter) + health, err := component.Converge(ctx) + if err != nil && isTransientError(err) { + return transientResult(err), nil + } + if err != nil { + componentLogger.Error(err, "Blocked component convergence failed", "step", "converge") + return health.Result, fmt.Errorf("%s converge (blocked): %w", component.Name(), err) + } + return health.Result, nil } - rc.emitNormal(postgresCluster, EventSecretReady, fmt.Sprintf("Superuser secret %s created", postgresSecretName)) - logger.Info("Superuser secret ref persisted to status") - } - // Re-attach ownerRef if it was stripped (e.g. by a Retain-policy deletion of a previous cluster). - hasOwnerRef, ownerRefErr := controllerutil.HasOwnerReference(secret.GetOwnerReferences(), postgresCluster, rc.Scheme) - if ownerRefErr != nil { - logger.Error(ownerRefErr, "Failed to check owner reference on Secret") - return ctrl.Result{}, fmt.Errorf("failed to check owner reference on secret: %w", ownerRefErr) + if err := component.Actuate(ctx); err != nil { + if isTransientError(err) { + componentLogger.Error(err, "Component actuation transient error, requeueing", "step", "actuate") + return transientResult(err), nil + } + componentLogger.Error(err, "Component actuation failed", "step", "actuate") + return ctrl.Result{}, fmt.Errorf("%s actuate: %w", component.Name(), err) + } + componentLogger.Info("Component actuation completed", "step", "actuate") + + health, err := component.Converge(ctx) + if err != nil && isTransientError(err) { + componentLogger.Error(err, "Component convergence transient error, requeueing", "step", "converge") + return transientResult(err), nil + } + + if err != nil { + componentLogger.Error(err, "Component convergence failed", + "step", "converge", + "condition", health.Condition, + "reason", health.Reason, + "phase", health.Phase) + return health.Result, fmt.Errorf("%s converge: %w", component.Name(), err) + } + if isIntermediateState(health.State) { + componentLogger.Info("Component convergence pending", + "step", "converge", + "condition", health.Condition, + "reason", health.Reason, + "phase", health.Phase, + "requeueAfter", health.Result.RequeueAfter) + return health.Result, nil + } + componentLogger.Info("Component convergence ready", + "step", "converge", + "condition", health.Condition, + "reason", health.Reason, + "phase", health.Phase) + if health.Result != (ctrl.Result{}) { + componentLogger.Info("Component requested explicit result", + "step", "converge", + "requeueAfter", health.Result.RequeueAfter) + return health.Result, nil + } + return ctrl.Result{}, nil } - if secretExists && !hasOwnerRef { - logger.Info("Existing secret linked to PostgresCluster", "name", postgresSecretName) - rc.emitNormal(postgresCluster, EventClusterAdopted, fmt.Sprintf("Adopted existing CNPG cluster and secret %s", postgresSecretName)) - originalSecret := secret.DeepCopy() - if err := ctrl.SetControllerReference(postgresCluster, secret, rc.Scheme); err != nil { - return ctrl.Result{}, fmt.Errorf("failed to set controller reference on existing secret: %w", err) + + for _, component := range bootstrapComponents { + result, err := phase(component) + if err != nil { + return result, err } - if err := patchObject(ctx, c, originalSecret, secret, "Secret"); err != nil { - logger.Error(err, "Failed to patch existing secret with controller reference") - rc.emitWarning(postgresCluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to patch existing secret: %v", err)) - statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonSuperUserSecretFailed, - fmt.Sprintf("Failed to patch existing secret: %v", err), failedClusterPhase) - return ctrl.Result{}, errors.Join(err, statusErr) + if result != (ctrl.Result{}) { + return result, nil } } - if postgresCluster.Status.Resources.SuperUserSecretRef == nil { - postgresCluster.Status.Resources.SuperUserSecretRef = &corev1.SecretKeySelector{ - LocalObjectReference: corev1.LocalObjectReference{Name: postgresSecretName}, - Key: secretKeyPassword, + cnpgCluster = clusterComponent.cnpgCluster + runtimeView := clusterRuntimeViewAdapter{model: clusterComponent} + runtimeComponents := []component{ + newManagedRolesModel(c, rc.Scheme, rc, updateComponentHealthStatus, runtimeView, postgresCluster, postgresSecretName), + newPoolerModel(c, rc.Scheme, rc, updateComponentHealthStatus, postgresCluster, clusterClass, mergedConfig, cnpgCluster, poolerEnabled, poolerConfigPresent), + newConfigMapModel(c, rc.Scheme, rc, updateComponentHealthStatus, runtimeView, postgresCluster, postgresSecretName), + } + + for _, component := range runtimeComponents { + result, err := phase(component) + if err != nil { + return result, err + } + if result != (ctrl.Result{}) { + return result, nil } } + logger.Info("Reconciliation complete") + if err := updatePhaseStatus(readyClusterPhase); err != nil { + if apierrors.IsConflict(err) { + return ctrl.Result{Requeue: true}, nil + } + return ctrl.Result{}, err + } + return ctrl.Result{}, nil +} + +func isTransientError(err error) bool { + return apierrors.IsConflict(err) || + apierrors.IsServerTimeout(err) || + apierrors.IsTooManyRequests(err) || + apierrors.IsTimeout(err) +} + +func transientResult(err error) ctrl.Result { + if apierrors.IsConflict(err) { + return ctrl.Result{Requeue: true} + } + return ctrl.Result{RequeueAfter: retryDelay} +} + +func writeComponentStatus(updateStatus healthStatusUpdater, health componentHealth) error { + if updateStatus == nil { + return nil + } + return updateStatus(health) +} + +// types/dto candidate +type componentHealth struct { + State pgcConstants.State + Condition conditionTypes + Reason conditionReasons + Message string + Phase reconcileClusterPhases + Result ctrl.Result +} + +type component interface { + Actuate(ctx context.Context) error + Converge(ctx context.Context) (componentHealth, error) + EvaluatePrerequisites(ctx context.Context) (prerequisiteDecision, error) + Name() string +} + +type prerequisiteDecision struct { + Allowed bool + Health componentHealth +} + +type healthStatusUpdater func(health componentHealth) error - // Build desired CNPG Cluster spec. - desiredSpec := buildCNPGClusterSpec(mergedConfig, postgresSecretName, postgresMetricsEnabled) +type eventEmitter interface { + emitNormal(obj client.Object, reason, message string) + emitWarning(obj client.Object, reason, message string) +} + +type poolerEmitter interface { + eventEmitter + emitPoolerReadyTransition(obj client.Object, conditions []metav1.Condition) + emitPoolerCreationTransition(obj client.Object, conditions []metav1.Condition) +} + +type clusterRuntimeView interface { + Cluster() *cnpgv1.Cluster + IsHealthy() bool +} + +type clusterRuntimeViewAdapter struct { + model *clusterModel +} + +func (v clusterRuntimeViewAdapter) Cluster() *cnpgv1.Cluster { + return v.model.cnpgCluster +} + +func (v clusterRuntimeViewAdapter) IsHealthy() bool { + return v.model.cnpgCluster != nil && v.model.cnpgCluster.Status.Phase == cnpgv1.PhaseHealthy +} - // Fetch existing CNPG Cluster or create it. +type clusterModel struct { + client client.Client + scheme *runtime.Scheme + events eventEmitter + updateStatus healthStatusUpdater + cluster *enterprisev4.PostgresCluster + clusterClass *enterprisev4.PostgresClusterClass + mergedConfig *MergedConfig + secretName string + cnpgCluster *cnpgv1.Cluster + cnpgCreated bool + cnpgPatched bool + + metricsEnabled bool + health componentHealth +} + +func newClusterModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, updateStatus healthStatusUpdater, cluster *enterprisev4.PostgresCluster, clusterClass *enterprisev4.PostgresClusterClass, mergedConfig *MergedConfig, secretName string) *clusterModel { + model := &clusterModel{ + client: c, scheme: scheme, + events: events, updateStatus: updateStatus, + cluster: cluster, clusterClass: clusterClass, mergedConfig: mergedConfig, + secretName: secretName, + } + model.metricsEnabled = isPostgreSQLMetricsEnabled(cluster, clusterClass) + return model +} + +func (p *clusterModel) Name() string { return pgcConstants.ComponentProvisioner } + +func (p *clusterModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { + if health, missing := p.getHealthOnMissingSecretRef(); missing { + return prerequisiteDecision{ + Allowed: false, + Health: health, + }, nil + } + return prerequisiteDecision{Allowed: true}, nil +} + +func (p *clusterModel) Actuate(ctx context.Context) error { + p.cnpgCreated = false + p.cnpgPatched = false + + desiredSpec := buildCNPGClusterSpec(p.mergedConfig, p.secretName, p.metricsEnabled) existingCNPG := &cnpgv1.Cluster{} - err = c.Get(ctx, types.NamespacedName{Name: postgresCluster.Name, Namespace: postgresCluster.Namespace}, existingCNPG) + err := p.client.Get(ctx, types.NamespacedName{Name: p.cluster.Name, Namespace: p.cluster.Namespace}, existingCNPG) switch { case apierrors.IsNotFound(err): - logger.Info("CNPG Cluster creation started", "name", postgresCluster.Name) - newCluster, err := buildCNPGCluster(rc.Scheme, postgresCluster, mergedConfig, postgresSecretName, postgresMetricsEnabled) + newCluster, err := buildCNPGCluster(p.scheme, p.cluster, p.mergedConfig, p.secretName, p.metricsEnabled) if err != nil { - logger.Error(err, "Failed to build CNPG Cluster", "name", postgresCluster.Name) - return ctrl.Result{}, err - } - if err := c.Create(ctx, newCluster); err != nil { - logger.Error(err, "Failed to create CNPG Cluster") - rc.emitWarning(postgresCluster, EventClusterCreateFailed, fmt.Sprintf("Failed to create CNPG cluster: %v", err)) - statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterBuildFailed, - fmt.Sprintf("Failed to create CNPG Cluster: %v", err), failedClusterPhase) - return ctrl.Result{}, errors.Join(err, statusErr) - } - rc.emitNormal(postgresCluster, EventClusterCreationStarted, "CNPG cluster created, waiting for healthy state") - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterBuildSucceeded, - "CNPG Cluster created", pendingClusterPhase); statusErr != nil { - return ctrl.Result{}, statusErr - } - logger.Info("CNPG Cluster created, requeueing for status update", "name", postgresCluster.Name) - return ctrl.Result{RequeueAfter: retryDelay}, nil + p.events.emitWarning(p.cluster, EventClusterCreateFailed, fmt.Sprintf("Failed to build CNPG cluster: %v", err)) + return err + } + if err = p.client.Create(ctx, newCluster); err != nil { + p.events.emitWarning(p.cluster, EventClusterCreateFailed, fmt.Sprintf("Failed to create CNPG cluster: %v", err)) + return err + } + p.events.emitNormal(p.cluster, EventClusterCreationStarted, "CNPG cluster created, waiting for healthy state") + p.cnpgCluster = newCluster + p.cnpgCreated = true case err != nil: - logger.Error(err, "Failed to get CNPG Cluster") - statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterGetFailed, - fmt.Sprintf("Failed to get CNPG Cluster: %v", err), failedClusterPhase) - return ctrl.Result{}, errors.Join(err, statusErr) + return err + default: + p.cnpgCluster = existingCNPG + currentNormalized := normalizeCNPGClusterSpec(p.cnpgCluster.Spec, p.mergedConfig.Spec.PostgreSQLConfig) + desiredNormalized := normalizeCNPGClusterSpec(desiredSpec, p.mergedConfig.Spec.PostgreSQLConfig) + if !equality.Semantic.DeepEqual(currentNormalized, desiredNormalized) { + originalCluster := p.cnpgCluster.DeepCopy() + p.cnpgCluster.Spec = desiredSpec + if patchErr := patchObject(ctx, p.client, originalCluster, p.cnpgCluster, "CNPGCluster"); patchErr != nil { + p.events.emitWarning(p.cluster, EventClusterUpdateFailed, fmt.Sprintf("Failed to patch CNPG cluster: %v", patchErr)) + return patchErr + } + p.events.emitNormal(p.cluster, EventClusterUpdateStarted, "CNPG cluster spec updated, waiting for healthy state") + p.cnpgPatched = true + } } - // Patch CNPG Cluster spec if drift detected. - cnpgCluster = existingCNPG - currentNormalized := normalizeCNPGClusterSpec(cnpgCluster.Spec, mergedConfig.Spec.PostgreSQLConfig) - desiredNormalized := normalizeCNPGClusterSpec(desiredSpec, mergedConfig.Spec.PostgreSQLConfig) - - if !equality.Semantic.DeepEqual(currentNormalized, desiredNormalized) { - logger.Info("CNPG Cluster spec drift detected, patch started", "name", cnpgCluster.Name) - originalCluster := cnpgCluster.DeepCopy() - cnpgCluster.Spec = desiredSpec - - switch patchErr := patchObject(ctx, c, originalCluster, cnpgCluster, "CNPGCluster"); { - case patchErr != nil: - logger.Error(patchErr, "Failed to patch CNPG Cluster", "name", cnpgCluster.Name) - rc.emitWarning(postgresCluster, EventClusterUpdateFailed, fmt.Sprintf("Failed to patch CNPG cluster: %v", patchErr)) - statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterPatchFailed, - fmt.Sprintf("Failed to patch CNPG Cluster: %v", patchErr), failedClusterPhase) - return ctrl.Result{}, errors.Join(patchErr, statusErr) - default: - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterBuildSucceeded, - "CNPG Cluster spec updated, waiting for healthy state", provisioningClusterPhase); statusErr != nil { - return ctrl.Result{}, statusErr + if p.cnpgCluster != nil { + p.cluster.Status.ProvisionerRef = &corev1.ObjectReference{ + APIVersion: "postgresql.cnpg.io/v1", + Kind: "Cluster", + Namespace: p.cnpgCluster.Namespace, + Name: p.cnpgCluster.Name, + UID: p.cnpgCluster.UID, + } + } + return nil +} + +func (p *clusterModel) Converge(_ context.Context) (health componentHealth, err error) { + p.health.Condition = clusterReady + defer func() { + statusErr := writeComponentStatus(p.updateStatus, p.health) + if statusErr != nil { + if err != nil { + err = errors.Join(err, statusErr) + } else { + err = statusErr } - rc.emitNormal(postgresCluster, EventClusterUpdateStarted, "CNPG cluster spec updated, waiting for healthy state") - logger.Info("CNPG Cluster patched, requeueing for status update", "name", cnpgCluster.Name) - return ctrl.Result{RequeueAfter: retryDelay}, nil } + health = p.health + }() + + if missingHealth, missing := p.getHealthOnMissingSecretRef(); missing { + p.health = missingHealth + return p.health, nil } - // Reconcile ManagedRoles. - if err := reconcileManagedRoles(ctx, c, postgresCluster, cnpgCluster); err != nil { - logger.Error(err, "Failed to reconcile managed roles") - rc.emitWarning(postgresCluster, EventManagedRolesFailed, fmt.Sprintf("Failed to reconcile managed roles: %v", err)) - statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonManagedRolesFailed, - fmt.Sprintf("Failed to reconcile managed roles: %v", err), failedClusterPhase) - return ctrl.Result{}, errors.Join(err, statusErr) + if p.cnpgCluster == nil { + p.health.State = pgcConstants.Pending + p.health.Reason = reasonCNPGProvisioning + p.health.Message = msgCNPGPendingCreation + p.health.Phase = pendingClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil } - // Reconcile Connection Pooler. - poolerEnabled = mergedConfig.Spec.ConnectionPoolerEnabled != nil && *mergedConfig.Spec.ConnectionPoolerEnabled + if p.cnpgCreated { + p.health.State = pgcConstants.Pending + p.health.Reason = reasonCNPGProvisioning + p.health.Message = msgCNPGPendingCreation + p.health.Phase = pendingClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + } + + if p.cnpgPatched { + p.health.State = pgcConstants.Provisioning + p.health.Reason = reasonCNPGProvisioning + p.health.Message = fmt.Sprintf(msgFmtCNPGClusterPhase, p.cnpgCluster.Status.Phase) + p.health.Phase = provisioningClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + } + + switch p.cnpgCluster.Status.Phase { + case cnpgv1.PhaseHealthy: + p.health.State = pgcConstants.Ready + p.health.Reason = reasonCNPGClusterHealthy + p.health.Message = msgProvisionerHealthy + p.health.Phase = readyClusterPhase + p.health.Result = ctrl.Result{} + return p.health, nil + case cnpgv1.PhaseFirstPrimary, cnpgv1.PhaseCreatingReplica, cnpgv1.PhaseWaitingForInstancesToBeActive: + p.health.State = pgcConstants.Provisioning + p.health.Reason = reasonCNPGProvisioning + p.health.Message = fmt.Sprintf(msgFmtCNPGProvisioning, p.cnpgCluster.Status.Phase) + p.health.Phase = provisioningClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + case cnpgv1.PhaseSwitchover: + p.health.State = pgcConstants.Configuring + p.health.Reason = reasonCNPGSwitchover + p.health.Message = msgCNPGSwitchover + p.health.Phase = configuringClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + case cnpgv1.PhaseFailOver: + p.health.State = pgcConstants.Configuring + p.health.Reason = reasonCNPGFailingOver + p.health.Message = msgCNPGFailingOver + p.health.Phase = configuringClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + case cnpgv1.PhaseInplacePrimaryRestart, cnpgv1.PhaseInplaceDeletePrimaryRestart: + p.health.State = pgcConstants.Configuring + p.health.Reason = reasonCNPGRestarting + p.health.Message = fmt.Sprintf(msgFmtCNPGRestarting, p.cnpgCluster.Status.Phase) + p.health.Phase = configuringClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + case cnpgv1.PhaseUpgrade, cnpgv1.PhaseMajorUpgrade, cnpgv1.PhaseUpgradeDelayed, cnpgv1.PhaseOnlineUpgrading: + p.health.State = pgcConstants.Configuring + p.health.Reason = reasonCNPGUpgrading + p.health.Message = fmt.Sprintf(msgFmtCNPGUpgrading, p.cnpgCluster.Status.Phase) + p.health.Phase = configuringClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + case cnpgv1.PhaseApplyingConfiguration: + p.health.State = pgcConstants.Configuring + p.health.Reason = reasonCNPGApplyingConfig + p.health.Message = msgCNPGApplyingConfiguration + p.health.Phase = configuringClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + case cnpgv1.PhaseReplicaClusterPromotion: + p.health.State = pgcConstants.Configuring + p.health.Reason = reasonCNPGPromoting + p.health.Message = msgCNPGPromoting + p.health.Phase = configuringClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + case cnpgv1.PhaseWaitingForUser: + p.health.State = pgcConstants.Failed + p.health.Reason = reasonCNPGWaitingForUser + p.health.Message = msgCNPGWaitingForUser + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + return p.health, fmt.Errorf("provisioner requires user action") + case cnpgv1.PhaseUnrecoverable: + p.health.State = pgcConstants.Failed + p.health.Reason = reasonCNPGUnrecoverable + p.health.Message = msgCNPGUnrecoverable + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + return p.health, fmt.Errorf("provisioner unrecoverable") + case cnpgv1.PhaseCannotCreateClusterObjects: + p.health.State = pgcConstants.Failed + p.health.Reason = reasonCNPGProvisioningFailed + p.health.Message = msgCNPGCannotCreateObjects + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + return p.health, fmt.Errorf("provisioner cannot create cluster objects") + case cnpgv1.PhaseUnknownPlugin, cnpgv1.PhaseFailurePlugin: + p.health.State = pgcConstants.Failed + p.health.Reason = reasonCNPGPluginError + p.health.Message = fmt.Sprintf(msgFmtCNPGPluginError, p.cnpgCluster.Status.Phase) + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + return p.health, fmt.Errorf("provisioner plugin error") + case cnpgv1.PhaseImageCatalogError, cnpgv1.PhaseArchitectureBinaryMissing: + p.health.State = pgcConstants.Failed + p.health.Reason = reasonCNPGImageError + p.health.Message = fmt.Sprintf(msgFmtCNPGImageError, p.cnpgCluster.Status.Phase) + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + return p.health, fmt.Errorf("provisioner image error") + case "": + p.health.State = pgcConstants.Pending + p.health.Reason = reasonCNPGProvisioning + p.health.Message = msgCNPGPendingCreation + p.health.Phase = pendingClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + default: + p.health.State = pgcConstants.Provisioning + p.health.Reason = reasonCNPGProvisioning + p.health.Message = fmt.Sprintf(msgFmtCNPGClusterPhase, p.cnpgCluster.Status.Phase) + p.health.Phase = provisioningClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + } +} + +func (p *clusterModel) getHealthOnMissingSecretRef() (componentHealth, bool) { + if p.cluster.Status.Resources == nil || p.cluster.Status.Resources.SuperUserSecretRef == nil { + return componentHealth{ + State: pgcConstants.Pending, + Condition: clusterReady, + Reason: reasonUserSecretPending, + Message: msgSecretRefNotPublished, + Phase: pendingClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, true + } + return componentHealth{}, false +} + +type managedRolesModel struct { + client client.Client + scheme *runtime.Scheme + events eventEmitter + updateStatus healthStatusUpdater + runtime clusterRuntimeView + cluster *enterprisev4.PostgresCluster + secret string + + health componentHealth +} + +func newManagedRolesModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, updateStatus healthStatusUpdater, runtime clusterRuntimeView, cluster *enterprisev4.PostgresCluster, secret string) *managedRolesModel { + return &managedRolesModel{client: c, scheme: scheme, events: events, updateStatus: updateStatus, runtime: runtime, cluster: cluster, secret: secret} +} + +func (m *managedRolesModel) Name() string { return pgcConstants.ComponentManagedRoles } + +func (m *managedRolesModel) runtimeGateHealth() (componentHealth, bool) { + if m.runtime == nil || !m.runtime.IsHealthy() { + return componentHealth{ + State: pgcConstants.Pending, + Condition: managedRolesReady, + Reason: reasonManagedRolesPending, + Message: "Managed roles blocked until CNPG cluster is healthy", + Phase: pendingClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, true + } + return componentHealth{}, false +} + +func (m *managedRolesModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { + if gateHealth, blocked := m.runtimeGateHealth(); blocked { + return prerequisiteDecision{ + Allowed: false, + Health: gateHealth, + }, nil + } + return prerequisiteDecision{Allowed: true}, nil +} + +func (m *managedRolesModel) Actuate(ctx context.Context) error { + if rolesErr := reconcileManagedRoles(ctx, m.client, m.cluster, m.runtime.Cluster()); rolesErr != nil { + m.events.emitWarning(m.cluster, EventManagedRolesFailed, fmt.Sprintf("Failed to reconcile managed roles: %v", rolesErr)) + m.health.State = pgcConstants.Failed + m.health.Reason = reasonManagedRolesFailed + m.health.Message = fmt.Sprintf("Failed to reconcile managed roles: %v", rolesErr) + m.health.Phase = failedClusterPhase + m.health.Result = ctrl.Result{} + return rolesErr + } + return nil +} + +func (m *managedRolesModel) Converge(ctx context.Context) (health componentHealth, err error) { + _ = ctx + m.health = componentHealth{Condition: managedRolesReady} + defer func() { + statusErr := writeComponentStatus(m.updateStatus, m.health) + if statusErr != nil { + if err != nil { + err = errors.Join(err, statusErr) + } else { + err = statusErr + } + } + health = m.health + }() + + if gateHealth, blocked := m.runtimeGateHealth(); blocked { + m.health = gateHealth + return m.health, nil + } + + syncManagedRolesStatusFromCNPG(m.cluster, m.runtime.Cluster()) + status := m.cluster.Status.ManagedRolesStatus + if status == nil { + m.health.State = pgcConstants.Failed + m.health.Reason = reasonManagedRolesFailed + m.health.Message = "Managed roles status not published yet" + m.health.Phase = failedClusterPhase + m.health.Result = ctrl.Result{RequeueAfter: retryDelay} + m.emitManagedRolesConvergeFailure(m.health.Message) + return m.health, fmt.Errorf("managed roles status not published") + } + + if len(status.Failed) > 0 { + m.health.State = pgcConstants.Failed + m.health.Reason = reasonManagedRolesFailed + m.health.Message = fmt.Sprintf("Managed roles reconciliation failed for %d role(s)", len(status.Failed)) + m.health.Phase = failedClusterPhase + m.health.Result = ctrl.Result{RequeueAfter: retryDelay} + m.emitManagedRolesConvergeFailure(m.health.Message) + return m.health, fmt.Errorf("managed roles have failed entries") + } + + if len(status.Pending) > 0 { + m.health.State = pgcConstants.Pending + m.health.Reason = reasonManagedRolesPending + m.health.Message = fmt.Sprintf("Managed roles pending for %d role(s)", len(status.Pending)) + m.health.Phase = pendingClusterPhase + m.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return m.health, nil + } + + m.health.State = pgcConstants.Ready + m.health.Reason = reasonManagedRolesReady + m.health.Message = "Managed roles are reconciled" + m.health.Phase = readyClusterPhase + m.health.Result = ctrl.Result{} + if !meta.IsStatusConditionTrue(m.cluster.Status.Conditions, string(managedRolesReady)) { + m.events.emitNormal(m.cluster, EventManagedRolesReady, m.health.Message) + } + return m.health, nil +} + +func (m *managedRolesModel) emitManagedRolesConvergeFailure(message string) { + cond := meta.FindStatusCondition(m.cluster.Status.Conditions, string(managedRolesReady)) + if cond != nil && + cond.Status == metav1.ConditionFalse && + cond.Reason == string(reasonManagedRolesFailed) && + cond.Message == message { + return + } + m.events.emitWarning(m.cluster, EventManagedRolesFailed, message) +} + +// TODO: Ports as access to cnpg originated info to decouple. +func syncManagedRolesStatusFromCNPG(cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster) { + if cluster == nil || cnpgCluster == nil { + return + } + + expectedRoles := make([]string, 0, len(cluster.Spec.ManagedRoles)) + for _, role := range cluster.Spec.ManagedRoles { + expectedRoles = append(expectedRoles, role.Name) + } + + cnpgStatus := cnpgCluster.Status.ManagedRolesStatus + reconciled := append([]string(nil), cnpgStatus.ByStatus[cnpgv1.RoleStatusReconciled]...) + pending := append([]string(nil), cnpgStatus.ByStatus[cnpgv1.RoleStatusPendingReconciliation]...) + + reconciledSet := make(map[string]struct{}, len(reconciled)) + for _, roleName := range reconciled { + reconciledSet[roleName] = struct{}{} + } + pendingSet := make(map[string]struct{}, len(pending)) + for _, roleName := range pending { + pendingSet[roleName] = struct{}{} + } + + failed := make(map[string]string, len(cnpgStatus.CannotReconcile)) + for roleName, errs := range cnpgStatus.CannotReconcile { + if len(errs) == 0 { + failed[roleName] = "role cannot be reconciled" + continue + } + failed[roleName] = strings.Join(errs, "; ") + } + + for _, roleName := range expectedRoles { + if _, ok := reconciledSet[roleName]; ok { + continue + } + if _, ok := failed[roleName]; ok { + continue + } + if _, ok := pendingSet[roleName]; ok { + continue + } + pending = append(pending, roleName) + } + + sort.Strings(reconciled) + sort.Strings(pending) + if len(failed) == 0 { + failed = nil + } - rwPoolerExists, err := poolerExists(ctx, c, postgresCluster, readWriteEndpoint) + cluster.Status.ManagedRolesStatus = &enterprisev4.ManagedRolesStatus{ + Reconciled: reconciled, + Pending: pending, + Failed: failed, + } +} + +type poolerModel struct { + client client.Client + scheme *runtime.Scheme + events poolerEmitter + updateStatus healthStatusUpdater + cluster *enterprisev4.PostgresCluster + clusterClass *enterprisev4.PostgresClusterClass + mergedConfig *MergedConfig + cnpgCluster *cnpgv1.Cluster + poolerEnabled bool + poolerConfigPresent bool + + metricsEnabled bool + health componentHealth +} + +func newPoolerModel(c client.Client, scheme *runtime.Scheme, events poolerEmitter, updateStatus healthStatusUpdater, cluster *enterprisev4.PostgresCluster, clusterClass *enterprisev4.PostgresClusterClass, mergedConfig *MergedConfig, cnpgCluster *cnpgv1.Cluster, poolerEnabled bool, poolerConfigPresent bool) *poolerModel { + model := &poolerModel{ + client: c, + scheme: scheme, + events: events, + updateStatus: updateStatus, + cluster: cluster, + clusterClass: clusterClass, + mergedConfig: mergedConfig, + cnpgCluster: cnpgCluster, + poolerEnabled: poolerEnabled, + poolerConfigPresent: poolerConfigPresent, + } + model.metricsEnabled = isConnectionPoolerMetricsEnabled(cluster, clusterClass) + return model +} + +func (p *poolerModel) Name() string { return pgcConstants.ComponentPooler } + +func (p *poolerModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { + if !p.poolerEnabled || !p.poolerConfigPresent { + return prerequisiteDecision{Allowed: true}, nil + } + if p.cnpgCluster == nil { + return prerequisiteDecision{ + Allowed: false, + Health: componentHealth{ + State: pgcConstants.Pending, + Condition: poolerReady, + Reason: reasonCNPGProvisioning, + Message: msgCNPGPendingCreation, + Phase: pendingClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, + }, nil + } + if p.cnpgCluster.Status.Phase != cnpgv1.PhaseHealthy { + return prerequisiteDecision{ + Allowed: false, + Health: componentHealth{ + State: pgcConstants.Provisioning, + Condition: poolerReady, + Reason: reasonCNPGProvisioning, + Message: fmt.Sprintf(msgFmtCNPGClusterPhase, p.cnpgCluster.Status.Phase), + Phase: provisioningClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, + }, nil + } + return prerequisiteDecision{Allowed: true}, nil +} + +func (p *poolerModel) Actuate(ctx context.Context) error { + switch { + case !p.poolerEnabled: + if err := deleteConnectionPoolers(ctx, p.client, p.cluster); err != nil { + return err + } + p.cluster.Status.ConnectionPoolerStatus = nil + meta.RemoveStatusCondition(&p.cluster.Status.Conditions, string(poolerReady)) + return nil + case !p.poolerConfigPresent: + return nil + case p.cnpgCluster == nil || p.cnpgCluster.Status.Phase != cnpgv1.PhaseHealthy: + return nil + default: + if err := createOrUpdateConnectionPoolers(ctx, p.client, p.scheme, p.cluster, p.mergedConfig, p.cnpgCluster, p.metricsEnabled); err != nil { + p.events.emitWarning(p.cluster, EventPoolerReconcileFailed, fmt.Sprintf("Failed to reconcile connection pooler: %v", err)) + return err + } + return nil + } +} + +func (p *poolerModel) Converge(ctx context.Context) (health componentHealth, err error) { + p.health = componentHealth{Condition: poolerReady} + defer func() { + statusErr := writeComponentStatus(p.updateStatus, p.health) + if statusErr != nil { + if err != nil { + err = errors.Join(err, statusErr) + } else { + err = statusErr + } + } + health = p.health + }() + + if !p.poolerEnabled { + p.health.State = pgcConstants.Ready + p.health.Reason = reasonAllInstancesReady + p.health.Message = msgPoolerDisabled + p.health.Phase = readyClusterPhase + p.health.Result = ctrl.Result{} + return p.health, nil + } + if !p.poolerConfigPresent { + p.health.State = pgcConstants.Failed + p.health.Reason = reasonPoolerConfigMissing + p.health.Message = msgPoolerConfigMissing + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + return p.health, fmt.Errorf("pooler config missing") + } + if p.cnpgCluster == nil { + p.health.State = pgcConstants.Pending + p.health.Reason = reasonCNPGProvisioning + p.health.Message = msgCNPGPendingCreation + p.health.Phase = pendingClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + } + if p.cnpgCluster.Status.Phase != cnpgv1.PhaseHealthy { + p.health.State = pgcConstants.Provisioning + p.health.Reason = reasonCNPGProvisioning + p.health.Message = fmt.Sprintf(msgFmtCNPGClusterPhase, p.cnpgCluster.Status.Phase) + p.health.Phase = provisioningClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + } + + // TODO: Port material. + rwExists, err := poolerExists(ctx, p.client, p.cluster, readWriteEndpoint) if err != nil { - logger.Error(err, "Failed to check RW pooler existence") - statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, - fmt.Sprintf("Failed to check pooler existence: %v", err), failedClusterPhase) - return ctrl.Result{}, errors.Join(err, statusErr) + p.events.emitWarning(p.cluster, EventPoolerReconcileFailed, fmt.Sprintf("Failed to sync pooler status: %v", err)) + p.health.State = pgcConstants.Failed + p.health.Reason = reasonPoolerReconciliationFailed + p.health.Message = fmt.Sprintf("Failed to check RW pooler existence: %v", err) + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + return p.health, err + } + roExists, err := poolerExists(ctx, p.client, p.cluster, readOnlyEndpoint) + if err != nil { + p.events.emitWarning(p.cluster, EventPoolerReconcileFailed, fmt.Sprintf("Failed to sync pooler status: %v", err)) + p.health.State = pgcConstants.Failed + p.health.Reason = reasonPoolerReconciliationFailed + p.health.Message = fmt.Sprintf("Failed to check RO pooler existence: %v", err) + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + return p.health, err + } + if !rwExists || !roExists { + p.events.emitPoolerCreationTransition(p.cluster, p.cluster.Status.Conditions) + p.health.State = pgcConstants.Provisioning + p.health.Reason = reasonPoolerCreating + p.health.Message = msgPoolersProvisioning + p.health.Phase = provisioningClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + } + + rwPooler := &cnpgv1.Pooler{} + if err := p.client.Get(ctx, types.NamespacedName{ + Name: poolerResourceName(p.cluster.Name, readWriteEndpoint), + Namespace: p.cluster.Namespace, + }, rwPooler); err != nil { + p.events.emitPoolerCreationTransition(p.cluster, p.cluster.Status.Conditions) + p.health.State = pgcConstants.Pending + p.health.Reason = reasonPoolerCreating + p.health.Message = msgWaitRWPoolerObject + p.health.Phase = pendingClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil } - roPoolerExists, err := poolerExists(ctx, c, postgresCluster, readOnlyEndpoint) + roPooler := &cnpgv1.Pooler{} + if err := p.client.Get(ctx, types.NamespacedName{ + Name: poolerResourceName(p.cluster.Name, readOnlyEndpoint), + Namespace: p.cluster.Namespace, + }, roPooler); err != nil { + p.events.emitPoolerCreationTransition(p.cluster, p.cluster.Status.Conditions) + p.health.State = pgcConstants.Pending + p.health.Reason = reasonPoolerCreating + p.health.Message = msgWaitROPoolerObject + p.health.Phase = pendingClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + } + if !arePoolersReady(rwPooler, roPooler) { + p.events.emitPoolerCreationTransition(p.cluster, p.cluster.Status.Conditions) + p.health.State = pgcConstants.Pending + p.health.Reason = reasonPoolerCreating + p.health.Message = msgPoolersNotReady + p.health.Phase = pendingClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + } + + p.cluster.Status.ConnectionPoolerStatus = &enterprisev4.ConnectionPoolerStatus{Enabled: true} + p.health.State = pgcConstants.Ready + p.health.Reason = reasonAllInstancesReady + p.health.Message = msgPoolersReady + p.health.Phase = readyClusterPhase + p.health.Result = ctrl.Result{} + p.events.emitPoolerReadyTransition(p.cluster, p.cluster.Status.Conditions) + return p.health, nil +} + +type configMapModel struct { + client client.Client + scheme *runtime.Scheme + events eventEmitter + updateStatus healthStatusUpdater + runtime clusterRuntimeView + cluster *enterprisev4.PostgresCluster + secret string + + health componentHealth +} + +func newConfigMapModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, updateStatus healthStatusUpdater, runtime clusterRuntimeView, cluster *enterprisev4.PostgresCluster, secret string) *configMapModel { + return &configMapModel{client: c, scheme: scheme, events: events, updateStatus: updateStatus, runtime: runtime, cluster: cluster, secret: secret} +} + +func (c *configMapModel) Name() string { return pgcConstants.ComponentConfigMap } + +func (c *configMapModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { + return prerequisiteDecision{Allowed: true}, nil +} + +func (c *configMapModel) Actuate(ctx context.Context) error { + cnpgCluster := c.runtime.Cluster() + if cnpgCluster == nil { + return nil + } + desiredCM, err := generateConfigMap(ctx, c.client, c.scheme, c.cluster, cnpgCluster, c.secret) if err != nil { - logger.Error(err, "Failed to check RO pooler existence") - statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, - fmt.Sprintf("Failed to check pooler existence: %v", err), failedClusterPhase) - return ctrl.Result{}, errors.Join(err, statusErr) + c.events.emitWarning(c.cluster, EventConfigMapReconcileFailed, fmt.Sprintf("Failed to reconcile ConfigMap: %v", err)) + return err + } + cm := &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: desiredCM.Name, Namespace: desiredCM.Namespace}} + _, err = controllerutil.CreateOrUpdate(ctx, c.client, cm, func() error { + cm.Data = desiredCM.Data + cm.Annotations = desiredCM.Annotations + cm.Labels = desiredCM.Labels + if !metav1.IsControlledBy(cm, c.cluster) { + if setErr := ctrl.SetControllerReference(c.cluster, cm, c.scheme); setErr != nil { + return fmt.Errorf("setting controller reference: %w", setErr) + } + } + return nil + }) + if err != nil { + c.events.emitWarning(c.cluster, EventConfigMapReconcileFailed, fmt.Sprintf("Failed to reconcile ConfigMap: %v", err)) + return err } + if c.cluster.Status.Resources.ConfigMapRef == nil { + c.cluster.Status.Resources.ConfigMapRef = &corev1.LocalObjectReference{Name: desiredCM.Name} + } + return nil +} - switch { - case !poolerEnabled: - if err := deleteConnectionPoolers(ctx, c, postgresCluster); err != nil { - logger.Error(err, "Failed to delete connection poolers") - statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, - fmt.Sprintf("Failed to delete connection poolers: %v", err), failedClusterPhase) - return ctrl.Result{}, errors.Join(err, statusErr) - } - postgresCluster.Status.ConnectionPoolerStatus = nil - meta.RemoveStatusCondition(&postgresCluster.Status.Conditions, string(poolerReady)) - - case !rwPoolerExists || !roPoolerExists: - if mergedConfig.CNPG == nil || mergedConfig.CNPG.ConnectionPooler == nil { - logger.Info("Connection pooler enabled but no config found in class or cluster spec, skipping", - "class", postgresCluster.Spec.Class, "cluster", postgresCluster.Name) - statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerConfigMissing, - fmt.Sprintf("Connection pooler is enabled but no config found in class %q or cluster %q", - postgresCluster.Spec.Class, postgresCluster.Name), failedClusterPhase) - return ctrl.Result{}, statusErr - } - if cnpgCluster.Status.Phase != cnpgv1.PhaseHealthy { - logger.Info("CNPG Cluster not healthy yet, pending pooler creation", "clusterPhase", cnpgCluster.Status.Phase) - statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonCNPGClusterNotHealthy, - "Waiting for CNPG cluster to become healthy before creating poolers", pendingClusterPhase) - return ctrl.Result{RequeueAfter: retryDelay}, statusErr - } - if err := createOrUpdateConnectionPoolers(ctx, c, rc.Scheme, postgresCluster, mergedConfig, cnpgCluster, poolerMetricsEnabled); err != nil { - logger.Error(err, "Failed to reconcile connection pooler") - rc.emitWarning(postgresCluster, EventPoolerReconcileFailed, fmt.Sprintf("Failed to reconcile connection pooler: %v", err)) - statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, - fmt.Sprintf("Failed to reconcile connection pooler: %v", err), failedClusterPhase) - return ctrl.Result{}, errors.Join(err, statusErr) - } - rc.emitNormal(postgresCluster, EventPoolerCreationStarted, "Connection poolers created, waiting for readiness") - logger.Info("Connection pooler creation started, requeueing") - if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerCreating, - "Connection poolers are being provisioned", provisioningClusterPhase); statusErr != nil { - return ctrl.Result{}, statusErr - } - return ctrl.Result{RequeueAfter: retryDelay}, nil - - case func() bool { - rwPooler := &cnpgv1.Pooler{} - rwErr := c.Get(ctx, types.NamespacedName{ - Name: poolerResourceName(postgresCluster.Name, readWriteEndpoint), - Namespace: postgresCluster.Namespace, - }, rwPooler) - roPooler := &cnpgv1.Pooler{} - roErr := c.Get(ctx, types.NamespacedName{ - Name: poolerResourceName(postgresCluster.Name, readOnlyEndpoint), - Namespace: postgresCluster.Namespace, - }, roPooler) - return rwErr != nil || roErr != nil || !arePoolersReady(rwPooler, roPooler) - }(): - logger.Info("Connection Poolers are not ready yet, requeueing") - statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerCreating, - "Connection poolers are being provisioned", pendingClusterPhase) - return ctrl.Result{RequeueAfter: retryDelay}, statusErr +func (c *configMapModel) Converge(ctx context.Context) (health componentHealth, err error) { + c.health = componentHealth{Condition: configMapsReady} + defer func() { + statusErr := writeComponentStatus(c.updateStatus, c.health) + if statusErr != nil { + if err != nil { + err = errors.Join(err, statusErr) + } else { + err = statusErr + } + } + health = c.health + }() + + if c.runtime == nil || !c.runtime.IsHealthy() { + c.health.State = pgcConstants.Provisioning + c.health.Reason = reasonCNPGProvisioning + c.health.Message = msgCNPGPendingCreation + c.health.Phase = provisioningClusterPhase + c.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return c.health, nil + } + + if c.cluster.Status.Resources == nil || c.cluster.Status.Resources.ConfigMapRef == nil { + c.health.State = pgcConstants.Provisioning + c.health.Reason = reasonConfigMapFailed + c.health.Message = msgConfigMapRefNotPublished + c.health.Phase = provisioningClusterPhase + c.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return c.health, nil + } + + cm := &corev1.ConfigMap{} + key := types.NamespacedName{Name: c.cluster.Status.Resources.ConfigMapRef.Name, Namespace: c.cluster.Namespace} + if err := c.client.Get(ctx, key, cm); err != nil { + if apierrors.IsNotFound(err) { + c.health.State = pgcConstants.Provisioning + c.health.Reason = reasonConfigMapFailed + c.health.Message = msgConfigMapNotFoundYet + c.health.Phase = provisioningClusterPhase + c.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return c.health, nil + } + c.health.State = pgcConstants.Failed + c.health.Reason = reasonConfigMapFailed + c.health.Message = fmt.Sprintf("Failed to fetch ConfigMap: %v", err) + c.health.Phase = failedClusterPhase + c.health.Result = ctrl.Result{} + return c.health, err + } + + requiredKeys := []string{ + configKeyClusterRWEndpoint, + configKeyClusterROEndpoint, + configKeyClusterREndpoint, + configKeyDefaultClusterPort, + configKeySuperUserSecretRef, + } + for _, requiredKey := range requiredKeys { + if _, ok := cm.Data[requiredKey]; !ok { + c.health.State = pgcConstants.Failed + c.health.Reason = reasonConfigMapFailed + c.health.Message = fmt.Sprintf(msgFmtConfigMapMissingRequiredKey, requiredKey) + c.health.Phase = failedClusterPhase + c.health.Result = ctrl.Result{} + return c.health, fmt.Errorf("configmap missing key %s", requiredKey) + } + } - default: - oldConditions := make([]metav1.Condition, len(postgresCluster.Status.Conditions)) - copy(oldConditions, postgresCluster.Status.Conditions) - if err := syncPoolerStatus(ctx, c, rc.Metrics, postgresCluster); err != nil { - logger.Error(err, "Failed to sync pooler status") - rc.emitWarning(postgresCluster, EventPoolerReconcileFailed, fmt.Sprintf("Failed to sync pooler status: %v", err)) - statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, - fmt.Sprintf("Failed to sync pooler status: %v", err), failedClusterPhase) - return ctrl.Result{}, errors.Join(err, statusErr) - } - rc.emitPoolerReadyTransition(postgresCluster, oldConditions) - } - - // Reconcile ConfigMap when CNPG cluster is healthy. - if cnpgCluster.Status.Phase == cnpgv1.PhaseHealthy { - logger.Info("CNPG Cluster healthy, reconciling ConfigMap") - desiredCM, err := generateConfigMap(ctx, c, rc.Scheme, postgresCluster, cnpgCluster, postgresSecretName) - if err != nil { - logger.Error(err, "Failed to generate ConfigMap") - rc.emitWarning(postgresCluster, EventConfigMapReconcileFailed, fmt.Sprintf("Failed to reconcile ConfigMap: %v", err)) - statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonConfigMapFailed, - fmt.Sprintf("Failed to generate ConfigMap: %v", err), failedClusterPhase) - return ctrl.Result{}, errors.Join(err, statusErr) - } - cm := &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: desiredCM.Name, Namespace: desiredCM.Namespace}} - createOrUpdateResult, err := controllerutil.CreateOrUpdate(ctx, c, cm, func() error { - cm.Data = desiredCM.Data - cm.Annotations = desiredCM.Annotations - cm.Labels = desiredCM.Labels - if !metav1.IsControlledBy(cm, postgresCluster) { - if err := ctrl.SetControllerReference(postgresCluster, cm, rc.Scheme); err != nil { - return fmt.Errorf("setting controller reference: %w", err) - } + c.health.State = pgcConstants.Ready + c.health.Reason = reasonConfigMapReady + c.health.Message = msgAccessConfigMapReady + c.health.Phase = readyClusterPhase + c.health.Result = ctrl.Result{} + if !meta.IsStatusConditionTrue(c.cluster.Status.Conditions, string(configMapsReady)) { + c.events.emitNormal(c.cluster, EventConfigMapReady, c.health.Message) + } + return c.health, nil +} + +type secretModel struct { + client client.Client + scheme *runtime.Scheme + events eventEmitter + updateStatus healthStatusUpdater + cluster *enterprisev4.PostgresCluster + name string + + health componentHealth +} + +func newSecretModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, updateStatus healthStatusUpdater, cluster *enterprisev4.PostgresCluster, name string) *secretModel { + return &secretModel{client: c, scheme: scheme, events: events, updateStatus: updateStatus, cluster: cluster, name: name} +} + +func (s *secretModel) Name() string { return pgcConstants.ComponentSecret } + +func (s *secretModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { + return prerequisiteDecision{Allowed: true}, nil +} + +func (s *secretModel) Actuate(ctx context.Context) error { + secret := &corev1.Secret{} + secretExists, secretErr := clusterSecretExists(ctx, s.client, s.cluster.Namespace, s.name, secret) + if secretErr != nil { + s.events.emitWarning(s.cluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to check secret existence: %v", secretErr)) + return secretErr + } + if !secretExists { + if err := ensureClusterSecret(ctx, s.client, s.scheme, s.cluster, s.name, secret); err != nil { + s.events.emitWarning(s.cluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to generate cluster secret: %v", err)) + return err + } + } + hasOwnerRef, ownerRefErr := controllerutil.HasOwnerReference(secret.GetOwnerReferences(), s.cluster, s.scheme) + if ownerRefErr != nil { + return fmt.Errorf("failed to check owner reference on secret: %w", ownerRefErr) + } + if secretExists && !hasOwnerRef { + originalSecret := secret.DeepCopy() + if err := ctrl.SetControllerReference(s.cluster, secret, s.scheme); err != nil { + return fmt.Errorf("failed to set controller reference on existing secret: %w", err) + } + if err := patchObject(ctx, s.client, originalSecret, secret, "Secret"); err != nil { + s.events.emitWarning(s.cluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to patch existing secret: %v", err)) + return err + } + s.events.emitNormal(s.cluster, EventClusterAdopted, fmt.Sprintf("Adopted existing CNPG cluster and secret %s", s.name)) + } + if s.cluster.Status.Resources.SuperUserSecretRef == nil { + s.cluster.Status.Resources.SuperUserSecretRef = &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: s.name}, + Key: secretKeyPassword, + } + } + return nil +} + +func (s *secretModel) Converge(ctx context.Context) (health componentHealth, err error) { + s.health = componentHealth{Condition: secretsReady} + defer func() { + statusErr := writeComponentStatus(s.updateStatus, s.health) + if statusErr != nil { + if err != nil { + err = errors.Join(err, statusErr) + } else { + err = statusErr } - return nil - }) - if err != nil { - logger.Error(err, "Failed to reconcile ConfigMap", "name", desiredCM.Name) - rc.emitWarning(postgresCluster, EventConfigMapReconcileFailed, fmt.Sprintf("Failed to reconcile ConfigMap: %v", err)) - statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonConfigMapFailed, - fmt.Sprintf("Failed to reconcile ConfigMap: %v", err), failedClusterPhase) - return ctrl.Result{}, errors.Join(err, statusErr) - } - switch createOrUpdateResult { - case controllerutil.OperationResultCreated: - rc.emitNormal(postgresCluster, EventConfigMapReady, fmt.Sprintf("ConfigMap %s created", desiredCM.Name)) - logger.Info("ConfigMap created", "name", desiredCM.Name) - case controllerutil.OperationResultUpdated: - rc.emitNormal(postgresCluster, EventConfigMapReady, fmt.Sprintf("ConfigMap %s updated", desiredCM.Name)) - logger.Info("ConfigMap updated", "name", desiredCM.Name) - default: - logger.Info("ConfigMap unchanged", "name", desiredCM.Name) - } - if postgresCluster.Status.Resources.ConfigMapRef == nil { - postgresCluster.Status.Resources.ConfigMapRef = &corev1.LocalObjectReference{Name: desiredCM.Name} - } - } - - // Final status sync. - var oldPhase string - if postgresCluster.Status.Phase != nil { - oldPhase = *postgresCluster.Status.Phase - } - if err := syncStatus(ctx, c, rc.Metrics, postgresCluster, cnpgCluster); err != nil { - logger.Error(err, "Failed to sync status") - return ctrl.Result{}, err + } + health = s.health + }() + + if s.cluster.Status.Resources == nil || s.cluster.Status.Resources.SuperUserSecretRef == nil { + s.health.State = pgcConstants.Provisioning + s.health.Reason = reasonUserSecretPending + s.health.Message = msgSecretRefNotPublished + s.health.Phase = provisioningClusterPhase + s.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return s.health, nil } - var newPhase string - if postgresCluster.Status.Phase != nil { - newPhase = *postgresCluster.Status.Phase - } - rc.emitClusterPhaseTransition(postgresCluster, oldPhase, newPhase) - if cnpgCluster.Status.Phase == cnpgv1.PhaseHealthy { - rwPooler := &cnpgv1.Pooler{} - rwErr := c.Get(ctx, types.NamespacedName{ - Name: poolerResourceName(postgresCluster.Name, readWriteEndpoint), - Namespace: postgresCluster.Namespace, - }, rwPooler) - roPooler := &cnpgv1.Pooler{} - roErr := c.Get(ctx, types.NamespacedName{ - Name: poolerResourceName(postgresCluster.Name, readOnlyEndpoint), - Namespace: postgresCluster.Namespace, - }, roPooler) - if rwErr == nil && roErr == nil && arePoolersReady(rwPooler, roPooler) { - logger.Info("Poolers ready, syncing status") - poolerOldConditions := make([]metav1.Condition, len(postgresCluster.Status.Conditions)) - copy(poolerOldConditions, postgresCluster.Status.Conditions) - _ = syncPoolerStatus(ctx, c, rc.Metrics, postgresCluster) - rc.emitPoolerReadyTransition(postgresCluster, poolerOldConditions) + + secret := &corev1.Secret{} + key := types.NamespacedName{Name: s.cluster.Status.Resources.SuperUserSecretRef.Name, Namespace: s.cluster.Namespace} + if err := s.client.Get(ctx, key, secret); err != nil { + if apierrors.IsNotFound(err) { + s.health.State = pgcConstants.Provisioning + s.health.Reason = reasonUserSecretPending + s.health.Message = msgSecretNotFoundYet + s.health.Phase = provisioningClusterPhase + s.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return s.health, nil } + s.health.State = pgcConstants.Failed + s.health.Reason = reasonUserSecretFailed + s.health.Message = fmt.Sprintf("Failed to fetch superuser secret: %v", err) + s.health.Phase = failedClusterPhase + s.health.Result = ctrl.Result{} + return s.health, err + } + + refKey := s.cluster.Status.Resources.SuperUserSecretRef.Key + if refKey == "" { + refKey = secretKeyPassword + } + if _, ok := secret.Data[refKey]; !ok { + s.health.State = pgcConstants.Failed + s.health.Reason = reasonSuperUserSecretFailed + s.health.Message = fmt.Sprintf(msgFmtSecretMissingKey, refKey) + s.health.Phase = failedClusterPhase + s.health.Result = ctrl.Result{} + return s.health, fmt.Errorf("secret missing key %s", refKey) + } + + s.health.State = pgcConstants.Ready + s.health.Reason = reasonSuperUserSecretReady + s.health.Message = msgSuperuserSecretReady + s.health.Phase = readyClusterPhase + s.health.Result = ctrl.Result{} + if !meta.IsStatusConditionTrue(s.cluster.Status.Conditions, string(secretsReady)) { + s.events.emitNormal(s.cluster, EventSecretReady, s.health.Message) + } + return s.health, nil +} + +func isIntermediateState(state pgcConstants.State) bool { + switch state { + case pgcConstants.Pending, + pgcConstants.Provisioning, + pgcConstants.Configuring: + return true + default: + return false } - logger.Info("Reconciliation complete") - return ctrl.Result{}, nil } // getMergedConfig overlays PostgresCluster spec on top of the class defaults. @@ -626,14 +1484,6 @@ func isPoolerReady(pooler *cnpgv1.Pooler) bool { return pooler.Status.Instances >= desired } -func poolerInstanceCount(p *cnpgv1.Pooler) (desired, scheduled int32) { - desired = 1 - if p.Spec.Instances != nil { - desired = *p.Spec.Instances - } - return desired, p.Status.Instances -} - // createOrUpdateConnectionPoolers creates RW and RO poolers if they don't exist. func createOrUpdateConnectionPoolers(ctx context.Context, c client.Client, scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cfg *MergedConfig, cnpgCluster *cnpgv1.Cluster, poolerMetricsEnabled bool) error { if err := createConnectionPooler(ctx, c, scheme, cluster, cfg, cnpgCluster, readWriteEndpoint, poolerMetricsEnabled); err != nil { @@ -727,90 +1577,6 @@ func deleteConnectionPoolers(ctx context.Context, c client.Client, cluster *ente return nil } -// syncPoolerStatus populates ConnectionPoolerStatus and the PoolerReady condition. -func syncPoolerStatus(ctx context.Context, c client.Client, metrics ports.Recorder, cluster *enterprisev4.PostgresCluster) error { - rwPooler := &cnpgv1.Pooler{} - if err := c.Get(ctx, types.NamespacedName{ - Name: poolerResourceName(cluster.Name, readWriteEndpoint), - Namespace: cluster.Namespace, - }, rwPooler); err != nil { - return err - } - - roPooler := &cnpgv1.Pooler{} - if err := c.Get(ctx, types.NamespacedName{ - Name: poolerResourceName(cluster.Name, readOnlyEndpoint), - Namespace: cluster.Namespace, - }, roPooler); err != nil { - return err - } - - cluster.Status.ConnectionPoolerStatus = &enterprisev4.ConnectionPoolerStatus{Enabled: true} - rwDesired, rwScheduled := poolerInstanceCount(rwPooler) - roDesired, roScheduled := poolerInstanceCount(roPooler) - - return setStatus(ctx, c, metrics, cluster, poolerReady, metav1.ConditionTrue, reasonAllInstancesReady, - fmt.Sprintf("%s: %d/%d, %s: %d/%d", readWriteEndpoint, rwScheduled, rwDesired, readOnlyEndpoint, roScheduled, roDesired), - readyClusterPhase) -} - -// syncStatus maps CNPG Cluster state to PostgresCluster status. -func syncStatus(ctx context.Context, c client.Client, metrics ports.Recorder, cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster) error { - cluster.Status.ProvisionerRef = &corev1.ObjectReference{ - APIVersion: "postgresql.cnpg.io/v1", - Kind: "Cluster", - Namespace: cnpgCluster.Namespace, - Name: cnpgCluster.Name, - UID: cnpgCluster.UID, - } - - var phase reconcileClusterPhases - var condStatus metav1.ConditionStatus - var reason conditionReasons - var message string - - switch cnpgCluster.Status.Phase { - case cnpgv1.PhaseHealthy: - phase, condStatus, reason, message = readyClusterPhase, metav1.ConditionTrue, reasonCNPGClusterHealthy, "Cluster is up and running" - case cnpgv1.PhaseFirstPrimary, cnpgv1.PhaseCreatingReplica, cnpgv1.PhaseWaitingForInstancesToBeActive: - phase, condStatus, reason = provisioningClusterPhase, metav1.ConditionFalse, reasonCNPGProvisioning - message = fmt.Sprintf("CNPG cluster provisioning: %s", cnpgCluster.Status.Phase) - case cnpgv1.PhaseSwitchover: - phase, condStatus, reason, message = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGSwitchover, "Cluster changing primary node" - case cnpgv1.PhaseFailOver: - phase, condStatus, reason, message = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGFailingOver, "Pod missing, need to change primary" - case cnpgv1.PhaseInplacePrimaryRestart, cnpgv1.PhaseInplaceDeletePrimaryRestart: - phase, condStatus, reason = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGRestarting - message = fmt.Sprintf("CNPG cluster restarting: %s", cnpgCluster.Status.Phase) - case cnpgv1.PhaseUpgrade, cnpgv1.PhaseMajorUpgrade, cnpgv1.PhaseUpgradeDelayed, cnpgv1.PhaseOnlineUpgrading: - phase, condStatus, reason = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGUpgrading - message = fmt.Sprintf("CNPG cluster upgrading: %s", cnpgCluster.Status.Phase) - case cnpgv1.PhaseApplyingConfiguration: - phase, condStatus, reason, message = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGApplyingConfig, "Configuration change is being applied" - case cnpgv1.PhaseReplicaClusterPromotion: - phase, condStatus, reason, message = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGPromoting, "Replica is being promoted to primary" - case cnpgv1.PhaseWaitingForUser: - phase, condStatus, reason, message = failedClusterPhase, metav1.ConditionFalse, reasonCNPGWaitingForUser, "Action from the user is required" - case cnpgv1.PhaseUnrecoverable: - phase, condStatus, reason, message = failedClusterPhase, metav1.ConditionFalse, reasonCNPGUnrecoverable, "Cluster failed, needs manual intervention" - case cnpgv1.PhaseCannotCreateClusterObjects: - phase, condStatus, reason, message = failedClusterPhase, metav1.ConditionFalse, reasonCNPGProvisioningFailed, "Cluster resources cannot be created" - case cnpgv1.PhaseUnknownPlugin, cnpgv1.PhaseFailurePlugin: - phase, condStatus, reason = failedClusterPhase, metav1.ConditionFalse, reasonCNPGPluginError - message = fmt.Sprintf("CNPG plugin error: %s", cnpgCluster.Status.Phase) - case cnpgv1.PhaseImageCatalogError, cnpgv1.PhaseArchitectureBinaryMissing: - phase, condStatus, reason = failedClusterPhase, metav1.ConditionFalse, reasonCNPGImageError - message = fmt.Sprintf("CNPG image error: %s", cnpgCluster.Status.Phase) - case "": - phase, condStatus, reason, message = pendingClusterPhase, metav1.ConditionFalse, reasonCNPGProvisioning, "CNPG cluster is pending creation" - default: - phase, condStatus, reason = provisioningClusterPhase, metav1.ConditionFalse, reasonCNPGProvisioning - message = fmt.Sprintf("CNPG cluster phase: %s", cnpgCluster.Status.Phase) - } - - return setStatus(ctx, c, metrics, cluster, clusterReady, condStatus, reason, message, phase) -} - // setStatus sets the phase, condition and persists the status. // It skips the API write when the resulting status is identical to the current // state, avoiding unnecessary etcd churn and ResourceVersion bumps on stable clusters. @@ -831,7 +1597,9 @@ func setStatus(ctx context.Context, c client.Client, metrics ports.Recorder, clu return nil } - metrics.IncStatusTransition(ports.ControllerCluster, string(condType), string(status), string(reason)) + if metrics != nil { + metrics.IncStatusTransition(ports.ControllerCluster, string(condType), string(status), string(reason)) + } if err := c.Status().Update(ctx, cluster); err != nil { return fmt.Errorf("failed to update PostgresCluster status: %w", err) @@ -839,6 +1607,27 @@ func setStatus(ctx context.Context, c client.Client, metrics ports.Recorder, clu return nil } +func setStatusFromHealth(ctx context.Context, c client.Client, metrics ports.Recorder, cluster *enterprisev4.PostgresCluster, health componentHealth) error { + conditionStatus := metav1.ConditionFalse + if health.State == pgcConstants.Ready { + conditionStatus = metav1.ConditionTrue + } + return setStatus(ctx, c, metrics, cluster, health.Condition, conditionStatus, health.Reason, health.Message, health.Phase) +} + +func setPhaseStatus(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster, phase reconcileClusterPhases) error { + before := cluster.Status.DeepCopy() + p := string(phase) + cluster.Status.Phase = &p + if equality.Semantic.DeepEqual(*before, cluster.Status) { + return nil + } + if err := c.Status().Update(ctx, cluster); err != nil { + return fmt.Errorf("failed to update PostgresCluster status phase: %w", err) + } + return nil +} + // generateConfigMap builds a ConfigMap with connection details for the PostgresCluster. func generateConfigMap(ctx context.Context, c client.Client, scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster, secretName string) (*corev1.ConfigMap, error) { cmName := fmt.Sprintf("%s%s", cluster.Name, defaultConfigMapSuffix) @@ -847,12 +1636,12 @@ func generateConfigMap(ctx context.Context, c client.Client, scheme *runtime.Sch } data := map[string]string{ - "CLUSTER_RW_ENDPOINT": fmt.Sprintf("%s-rw.%s", cnpgCluster.Name, cnpgCluster.Namespace), - "CLUSTER_RO_ENDPOINT": fmt.Sprintf("%s-ro.%s", cnpgCluster.Name, cnpgCluster.Namespace), - "CLUSTER_R_ENDPOINT": fmt.Sprintf("%s-r.%s", cnpgCluster.Name, cnpgCluster.Namespace), - "DEFAULT_CLUSTER_PORT": defaultPort, - "SUPER_USER_NAME": superUsername, - "SUPER_USER_SECRET_REF": secretName, + configKeyClusterRWEndpoint: fmt.Sprintf("%s-rw.%s", cnpgCluster.Name, cnpgCluster.Namespace), + configKeyClusterROEndpoint: fmt.Sprintf("%s-ro.%s", cnpgCluster.Name, cnpgCluster.Namespace), + configKeyClusterREndpoint: fmt.Sprintf("%s-r.%s", cnpgCluster.Name, cnpgCluster.Namespace), + configKeyDefaultClusterPort: defaultPort, + configKeySuperUserName: superUsername, + configKeySuperUserSecretRef: secretName, } rwExists, err := poolerExists(ctx, c, cluster, readWriteEndpoint) if err != nil { @@ -863,8 +1652,8 @@ func generateConfigMap(ctx context.Context, c client.Client, scheme *runtime.Sch return nil, fmt.Errorf("failed to check RO pooler existence: %w", err) } if rwExists && roExists { - data["CLUSTER_POOLER_RW_ENDPOINT"] = fmt.Sprintf("%s.%s", poolerResourceName(cnpgCluster.Name, readWriteEndpoint), cnpgCluster.Namespace) - data["CLUSTER_POOLER_RO_ENDPOINT"] = fmt.Sprintf("%s.%s", poolerResourceName(cnpgCluster.Name, readOnlyEndpoint), cnpgCluster.Namespace) + data[configKeyPoolerRWEndpoint] = fmt.Sprintf("%s.%s", poolerResourceName(cnpgCluster.Name, readWriteEndpoint), cnpgCluster.Namespace) + data[configKeyPoolerROEndpoint] = fmt.Sprintf("%s.%s", poolerResourceName(cnpgCluster.Name, readOnlyEndpoint), cnpgCluster.Namespace) } cm := &corev1.ConfigMap{ @@ -1056,15 +1845,12 @@ func removeOwnerRef(scheme *runtime.Scheme, owner, obj client.Object) (bool, err // patchObject patches obj from original; treats NotFound as a no-op. func patchObject(ctx context.Context, c client.Client, original, obj client.Object, kind objectKind) error { - logger := log.FromContext(ctx) if err := c.Patch(ctx, obj, client.MergeFrom(original)); err != nil { if apierrors.IsNotFound(err) { - logger.Info("Object not found, skipping patch", "kind", kind, "name", obj.GetName()) return nil } return fmt.Errorf("patching %s: %w", kind, err) } - logger.Info("Object patched", "kind", kind, "name", obj.GetName()) return nil } diff --git a/pkg/postgresql/cluster/core/cluster_unit_test.go b/pkg/postgresql/cluster/core/cluster_unit_test.go index 72b30205b..85eeba7e2 100644 --- a/pkg/postgresql/cluster/core/cluster_unit_test.go +++ b/pkg/postgresql/cluster/core/cluster_unit_test.go @@ -6,18 +6,67 @@ import ( cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" enterprisev4 "github.com/splunk/splunk-operator/api/v4" + pgcConstants "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core/types/constants" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/utils/ptr" client "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" ) +type configMapNotFoundClient struct { + client.Client +} + +type noopEventEmitter struct{} + +func (noopEventEmitter) emitNormal(_ client.Object, _, _ string) {} +func (noopEventEmitter) emitWarning(_ client.Object, _, _ string) {} +func (noopEventEmitter) emitPoolerReadyTransition(_ client.Object, _ []metav1.Condition) {} +func (noopEventEmitter) emitPoolerCreationTransition(_ client.Object, _ []metav1.Condition) { +} + +type captureEventEmitter struct { + normals []string + warnings []string +} + +func (c *captureEventEmitter) emitNormal(_ client.Object, reason, message string) { + c.normals = append(c.normals, reason+":"+message) +} + +func (c *captureEventEmitter) emitWarning(_ client.Object, reason, message string) { + c.warnings = append(c.warnings, reason+":"+message) +} + +func (c *captureEventEmitter) emitPoolerReadyTransition(_ client.Object, conditions []metav1.Condition) { + if !meta.IsStatusConditionTrue(conditions, string(poolerReady)) { + c.normals = append(c.normals, EventPoolerReady+":Connection poolers are ready") + } +} + +func (c *captureEventEmitter) emitPoolerCreationTransition(_ client.Object, conditions []metav1.Condition) { + cond := meta.FindStatusCondition(conditions, string(poolerReady)) + if cond != nil && cond.Status == metav1.ConditionFalse && cond.Reason == string(reasonPoolerCreating) { + return + } + c.normals = append(c.normals, EventPoolerCreationStarted+":Connection poolers created, waiting for readiness") +} + +func (c configMapNotFoundClient) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + if _, ok := obj.(*corev1.ConfigMap); ok { + return apierrors.NewNotFound(schema.GroupResource{Resource: "configmaps"}, key.Name) + } + return c.Client.Get(ctx, key, obj, opts...) +} + func TestPoolerResourceName(t *testing.T) { tests := []struct { name string @@ -94,6 +143,46 @@ func TestIsPoolerReady(t *testing.T) { } } +func TestPoolerInstanceCountManual(t *testing.T) { + tests := []struct { + name string + pooler *cnpgv1.Pooler + expectedDesired int32 + expectedScheduled int32 + }{ + { + name: "nil instances defaults desired to 1", + pooler: &cnpgv1.Pooler{ + Status: cnpgv1.PoolerStatus{Instances: 3}, + }, + expectedDesired: 1, + expectedScheduled: 3, + }, + { + name: "explicit instances uses spec value", + pooler: &cnpgv1.Pooler{ + Spec: cnpgv1.PoolerSpec{Instances: ptr.To(int32(5))}, + Status: cnpgv1.PoolerStatus{Instances: 2}, + }, + expectedDesired: 5, + expectedScheduled: 2, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + desired := int32(1) + if tt.pooler.Spec.Instances != nil { + desired = *tt.pooler.Spec.Instances + } + scheduled := tt.pooler.Status.Instances + + assert.Equal(t, tt.expectedDesired, desired) + assert.Equal(t, tt.expectedScheduled, scheduled) + }) + } +} + func TestNormalizeCNPGClusterSpec(t *testing.T) { tests := []struct { name string @@ -1044,42 +1133,6 @@ func TestGenerateConfigMap(t *testing.T) { }) } -func TestPoolerInstanceCount(t *testing.T) { - tests := []struct { - name string - pooler *cnpgv1.Pooler - expectedDesired int32 - expectedScheduled int32 - }{ - { - name: "nil instances defaults desired to 1", - pooler: &cnpgv1.Pooler{ - Status: cnpgv1.PoolerStatus{Instances: 3}, - }, - expectedDesired: 1, - expectedScheduled: 3, - }, - { - name: "explicit instances returns spec value", - pooler: &cnpgv1.Pooler{ - Spec: cnpgv1.PoolerSpec{Instances: ptr.To(int32(5))}, - Status: cnpgv1.PoolerStatus{Instances: 2}, - }, - expectedDesired: 5, - expectedScheduled: 2, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - desired, scheduled := poolerInstanceCount(tt.pooler) - - assert.Equal(t, tt.expectedDesired, desired) - assert.Equal(t, tt.expectedScheduled, scheduled) - }) - } -} - func TestGeneratePassword(t *testing.T) { pw, err := generatePassword() @@ -1206,3 +1259,774 @@ func TestCreateOrUpdateConnectionPoolers(t *testing.T) { assert.Equal(t, poolerMetricsPortString, ro.Spec.Template.ObjectMeta.Annotations[prometheusPortAnnotation]) }) } + +func TestComponentStateTriggerConditions(t *testing.T) { + t.Parallel() + + ctx := t.Context() + scheme := runtime.NewScheme() + require.NoError(t, corev1.AddToScheme(scheme)) + require.NoError(t, enterprisev4.AddToScheme(scheme)) + require.NoError(t, cnpgv1.AddToScheme(scheme)) + + exampleClusterClass := &enterprisev4.PostgresClusterClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1-class", + Namespace: "default", + }, + Spec: enterprisev4.PostgresClusterClassSpec{ + Config: &enterprisev4.PostgresClusterClassConfig{ + ConnectionPoolerEnabled: ptr.To(true), + }, + }, + Status: enterprisev4.PostgresClusterClassStatus{ + Phase: ptr.To(string(enterprisev4.PhaseReady)), + }, + } + + exampleCm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1-config", + Namespace: "default", + }, + Data: map[string]string{ + "CLUSTER_RW_ENDPOINT": "pg1-rw.default", + "CLUSTER_RO_ENDPOINT": "pg1-ro.default", + "DEFAULT_CLUSTER_PORT": "5432", + "SUPER_USER_SECRET_REF": "pg1-secret", + }, + } + examplePgCluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1", + Namespace: "default", + }, + Status: enterprisev4.PostgresClusterStatus{ + Resources: &enterprisev4.PostgresClusterResources{ + ConfigMapRef: &corev1.LocalObjectReference{Name: "pg1-config"}, + SuperUserSecretRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: "pg1-secret"}, + Key: "password", + }, + }, + }, + } + exampleSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1-secret", + Namespace: "default", + }, + Data: map[string][]byte{ + "password": []byte("s3cr3t"), + }, + } + + instances := int32(1) + version := "16" + storageSize := resource.MustParse("10Gi") + mergedConfig := &MergedConfig{ + Spec: &enterprisev4.PostgresClusterSpec{ + Instances: &instances, + PostgresVersion: &version, + Storage: &storageSize, + Resources: &corev1.ResourceRequirements{}, + PostgreSQLConfig: map[string]string{}, + PgHBA: []string{}, + }, + } + + makeReadyProvisioner := func(cluster *enterprisev4.PostgresCluster) *clusterModel { + cnpg := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: cluster.Name, + Namespace: cluster.Namespace, + }, + Spec: buildCNPGClusterSpec(mergedConfig, "pg1-secret", false), + Status: cnpgv1.ClusterStatus{ + Phase: cnpgv1.PhaseHealthy, + }, + } + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cnpg).Build() + return newClusterModel(c, scheme, noopEventEmitter{}, nil, cluster, exampleClusterClass, mergedConfig, "pg1-secret") + } + + makeRuntimeView := func(healthy bool) clusterRuntimeView { + if !healthy { + return clusterRuntimeViewAdapter{model: &clusterModel{}} + } + return clusterRuntimeViewAdapter{model: &clusterModel{ + cnpgCluster: &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}, + }, + }} + } + + // TODO: as soon as coupling is addressed, remove this monster of a test. + combinations := []struct { + name string + components []component + conditions []conditionTypes + requeue []bool + expectAll bool + message string + }{ + { + name: "Provisioner ready, pooler blocked by prerequisites", + components: func() []component { + cluster := examplePgCluster.DeepCopy() + provisioner := makeReadyProvisioner(cluster) + pooler := newPoolerModel( + fake.NewClientBuilder().WithScheme(scheme).Build(), + scheme, + noopEventEmitter{}, + nil, + cluster, + exampleClusterClass, + mergedConfig, + nil, + true, + true, + ) + return []component{provisioner, pooler} + }(), + conditions: []conditionTypes{clusterReady, poolerReady}, + requeue: []bool{false, true}, + expectAll: false, + message: "Provisioner ready but pooler gate is blocked until CNPG is healthy", + }, + { + name: "Provisioner ready, pooler ready, configMap pending from NotFound", + components: func() []component { + cluster := examplePgCluster.DeepCopy() + provisioner := makeReadyProvisioner(cluster) + pooler := newPoolerModel( + fake.NewClientBuilder().WithScheme(scheme).Build(), + scheme, + noopEventEmitter{}, + nil, + cluster, + exampleClusterClass, + mergedConfig, + nil, + false, + false, + ) + configMap := newConfigMapModel( + configMapNotFoundClient{ + Client: fake.NewClientBuilder(). + WithScheme(scheme). + Build(), + }, + scheme, + noopEventEmitter{}, + nil, + makeRuntimeView(true), + cluster, + "pg1-secret", + ) + return []component{provisioner, pooler, configMap} + }(), + conditions: []conditionTypes{clusterReady, poolerReady, configMapsReady}, + requeue: []bool{false, false, true}, + expectAll: false, + message: "Provisioner and pooler ready are not enough when ConfigMap check returns NotFound/pending", + }, + { + name: "Flow successful, all components ready", + components: func() []component { + cluster := examplePgCluster.DeepCopy() + provisioner := makeReadyProvisioner(cluster) + pooler := newPoolerModel( + fake.NewClientBuilder().WithScheme(scheme).Build(), + scheme, + noopEventEmitter{}, + nil, + cluster, + exampleClusterClass, + mergedConfig, + nil, + false, + false, + ) + configMap := newConfigMapModel( + fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(exampleCm). + Build(), + scheme, + noopEventEmitter{}, + nil, + makeRuntimeView(true), + cluster, + "pg1-secret", + ) + secret := newSecretModel( + fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(exampleSecret). + Build(), + scheme, + noopEventEmitter{}, + nil, + cluster, + "pg1-secret", + ) + return []component{provisioner, pooler, configMap, secret} + }(), + conditions: []conditionTypes{clusterReady, poolerReady, configMapsReady, secretsReady}, + requeue: []bool{false, false, false, false}, + expectAll: true, + message: "", + }, + } + + for _, tt := range combinations { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + state := pgcConstants.Empty + for i, check := range tt.components { + gate, gateErr := check.EvaluatePrerequisites(ctx) + require.NoError(t, gateErr) + if !gate.Allowed { + info := gate.Health + state = info.State + assert.Equal(t, tt.conditions[i], info.Condition) + assert.Equal(t, tt.requeue[i], info.Result.RequeueAfter > 0) + continue + } + + require.NoError(t, check.Actuate(ctx)) + info, err := check.Converge(ctx) + require.NoError(t, err) + state = info.State + assert.Equal(t, tt.conditions[i], info.Condition) + assert.Equal(t, tt.requeue[i], info.Result.RequeueAfter > 0) + } + assert.Equal(t, tt.expectAll, state&pgcConstants.Ready == pgcConstants.Ready, + tt.message) + }) + } +} + +func TestSyncManagedRolesStatusFromCNPG(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + specRoles []enterprisev4.ManagedRole + cnpgStatus cnpgv1.ManagedRoles + reconciled []string + pending []string + failed map[string]string + }{ + { + name: "marks unreconciled desired role as pending", + specRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + }, + cnpgStatus: cnpgv1.ManagedRoles{}, + reconciled: nil, + pending: []string{"app_user"}, + failed: nil, + }, + { + name: "maps reconciled and pending roles from CNPG status", + specRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + {Name: "app_rw", Exists: true}, + }, + cnpgStatus: cnpgv1.ManagedRoles{ + ByStatus: map[cnpgv1.RoleStatus][]string{ + cnpgv1.RoleStatusReconciled: {"app_user"}, + cnpgv1.RoleStatusPendingReconciliation: {"app_rw"}, + }, + }, + reconciled: []string{"app_user"}, + pending: []string{"app_rw"}, + failed: nil, + }, + { + name: "maps cannot reconcile errors as failed", + specRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + }, + cnpgStatus: cnpgv1.ManagedRoles{ + CannotReconcile: map[string][]string{ + "app_user": {"reserved role"}, + }, + }, + reconciled: nil, + pending: nil, + failed: map[string]string{ + "app_user": "reserved role", + }, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + cluster := &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + ManagedRoles: tt.specRoles, + }, + } + cnpgCluster := &cnpgv1.Cluster{ + Status: cnpgv1.ClusterStatus{ + ManagedRolesStatus: tt.cnpgStatus, + }, + } + + syncManagedRolesStatusFromCNPG(cluster, cnpgCluster) + + require.NotNil(t, cluster.Status.ManagedRolesStatus) + assert.Equal(t, tt.reconciled, cluster.Status.ManagedRolesStatus.Reconciled) + assert.Equal(t, tt.pending, cluster.Status.ManagedRolesStatus.Pending) + assert.Equal(t, tt.failed, cluster.Status.ManagedRolesStatus.Failed) + }) + } +} + +func TestManagedRolesModelConverge(t *testing.T) { + t.Parallel() + + makeRuntimeView := func(phase string, managedRoles cnpgv1.ManagedRoles) clusterRuntimeView { + return clusterRuntimeViewAdapter{model: &clusterModel{ + cnpgCluster: &cnpgv1.Cluster{ + Status: cnpgv1.ClusterStatus{ + Phase: phase, + ManagedRolesStatus: managedRoles, + }, + }, + }} + } + + tests := []struct { + name string + runtimeView clusterRuntimeView + specRoles []enterprisev4.ManagedRole + expectedState pgcConstants.State + expectedReason conditionReasons + expectErr bool + expectStatusPublished bool + expectPending []string + expectFailed map[string]string + }{ + { + name: "returns pending when runtime is not healthy", + runtimeView: makeRuntimeView(cnpgv1.PhaseFirstPrimary, cnpgv1.ManagedRoles{}), + specRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + }, + expectedState: pgcConstants.Pending, + expectedReason: reasonManagedRolesPending, + expectErr: false, + expectStatusPublished: false, + }, + { + name: "returns pending when role is still pending reconciliation", + runtimeView: makeRuntimeView(cnpgv1.PhaseHealthy, cnpgv1.ManagedRoles{ + ByStatus: map[cnpgv1.RoleStatus][]string{ + cnpgv1.RoleStatusPendingReconciliation: {"app_user"}, + }, + }), + specRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + }, + expectedState: pgcConstants.Pending, + expectedReason: reasonManagedRolesPending, + expectErr: false, + expectStatusPublished: true, + expectPending: []string{"app_user"}, + }, + { + name: "returns failed when role cannot reconcile", + runtimeView: makeRuntimeView(cnpgv1.PhaseHealthy, cnpgv1.ManagedRoles{ + CannotReconcile: map[string][]string{ + "app_user": {"reserved role"}, + }, + }), + specRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + }, + expectedState: pgcConstants.Failed, + expectedReason: reasonManagedRolesFailed, + expectErr: true, + expectStatusPublished: true, + expectFailed: map[string]string{ + "app_user": "reserved role", + }, + }, + { + name: "returns ready when all desired roles are reconciled", + runtimeView: makeRuntimeView(cnpgv1.PhaseHealthy, cnpgv1.ManagedRoles{ + ByStatus: map[cnpgv1.RoleStatus][]string{ + cnpgv1.RoleStatusReconciled: {"app_user", "app_user_rw"}, + }, + }), + specRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + {Name: "app_user_rw", Exists: true}, + }, + expectedState: pgcConstants.Ready, + expectedReason: reasonManagedRolesReady, + expectErr: false, + expectStatusPublished: true, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + cluster := &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + ManagedRoles: tt.specRoles, + }, + } + model := newManagedRolesModel( + fake.NewClientBuilder().Build(), + nil, + noopEventEmitter{}, + nil, + tt.runtimeView, + cluster, + "pg1-secret", + ) + + health, err := model.Converge(context.Background()) + if tt.expectErr { + require.Error(t, err) + } else { + require.NoError(t, err) + } + + assert.Equal(t, managedRolesReady, health.Condition) + assert.Equal(t, tt.expectedState, health.State) + assert.Equal(t, tt.expectedReason, health.Reason) + if tt.expectStatusPublished { + require.NotNil(t, cluster.Status.ManagedRolesStatus) + assert.Equal(t, tt.expectPending, cluster.Status.ManagedRolesStatus.Pending) + assert.Equal(t, tt.expectFailed, cluster.Status.ManagedRolesStatus.Failed) + } else { + assert.Nil(t, cluster.Status.ManagedRolesStatus) + } + }) + } +} + +func TestManagedRolesRuntimeGateHealthMatchesConverge(t *testing.T) { + t.Parallel() + + cluster := &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + ManagedRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + }, + }, + } + model := newManagedRolesModel( + fake.NewClientBuilder().Build(), + nil, + noopEventEmitter{}, + nil, + clusterRuntimeViewAdapter{model: &clusterModel{ + cnpgCluster: &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseFirstPrimary}}, + }}, + cluster, + "pg1-secret", + ) + + gate, err := model.EvaluatePrerequisites(context.Background()) + require.NoError(t, err) + require.False(t, gate.Allowed) + + health, err := model.Converge(context.Background()) + require.NoError(t, err) + assert.Equal(t, gate.Health, health) +} + +func TestPoolerModelConvergeSetsConnectionPoolerStatus(t *testing.T) { + t.Parallel() + + scheme := runtime.NewScheme() + require.NoError(t, enterprisev4.AddToScheme(scheme)) + require.NoError(t, cnpgv1.AddToScheme(scheme)) + require.NoError(t, corev1.AddToScheme(scheme)) + + t.Run("does not set enabled true while pooler is pending", func(t *testing.T) { + t.Parallel() + + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + } + clusterClass := &enterprisev4.PostgresClusterClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1-class", + Namespace: "default", + }, + Spec: enterprisev4.PostgresClusterClassSpec{ + Config: &enterprisev4.PostgresClusterClassConfig{ + ConnectionPoolerEnabled: ptr.To(true), + }, + }, + } + model := newPoolerModel( + fake.NewClientBuilder().WithScheme(scheme).Build(), + scheme, + noopEventEmitter{}, + nil, + cluster, + clusterClass, + &MergedConfig{}, + nil, + true, + true, + ) + + health, err := model.Converge(context.Background()) + require.NoError(t, err) + assert.Nil(t, cluster.Status.ConnectionPoolerStatus) + assert.Equal(t, pgcConstants.Pending, health.State) + }) + + t.Run("sets enabled true when pooler converges ready", func(t *testing.T) { + t.Parallel() + + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + } + clusterClass := &enterprisev4.PostgresClusterClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1-class", + Namespace: "default", + }, + Spec: enterprisev4.PostgresClusterClassSpec{ + Config: &enterprisev4.PostgresClusterClassConfig{ + ConnectionPoolerEnabled: ptr.To(true), + }, + }, + } + rwPooler := &cnpgv1.Pooler{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolerResourceName(cluster.Name, readWriteEndpoint), + Namespace: cluster.Namespace, + }, + Status: cnpgv1.PoolerStatus{Instances: 1}, + } + roPooler := &cnpgv1.Pooler{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolerResourceName(cluster.Name, readOnlyEndpoint), + Namespace: cluster.Namespace, + }, + Status: cnpgv1.PoolerStatus{Instances: 1}, + } + model := newPoolerModel( + fake.NewClientBuilder().WithScheme(scheme).WithObjects(rwPooler, roPooler).Build(), + scheme, + noopEventEmitter{}, + nil, + cluster, + clusterClass, + &MergedConfig{}, + &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}}, + true, + true, + ) + + health, err := model.Converge(context.Background()) + require.NoError(t, err) + assert.Equal(t, &enterprisev4.ConnectionPoolerStatus{Enabled: true}, cluster.Status.ConnectionPoolerStatus) + assert.Equal(t, pgcConstants.Ready, health.State) + }) + + t.Run("sets status nil when pooler disabled", func(t *testing.T) { + t.Parallel() + + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + Status: enterprisev4.PostgresClusterStatus{ + ConnectionPoolerStatus: &enterprisev4.ConnectionPoolerStatus{Enabled: true}, + }, + } + clusterClass := &enterprisev4.PostgresClusterClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1-class", + Namespace: "default", + }, + Spec: enterprisev4.PostgresClusterClassSpec{ + Config: &enterprisev4.PostgresClusterClassConfig{ + ConnectionPoolerEnabled: ptr.To(true), + }, + }, + } + model := newPoolerModel( + fake.NewClientBuilder().WithScheme(scheme).Build(), + scheme, + noopEventEmitter{}, + nil, + cluster, + clusterClass, + &MergedConfig{}, + nil, + false, + false, + ) + + require.NoError(t, model.Actuate(context.Background())) + health, err := model.Converge(context.Background()) + require.NoError(t, err) + assert.Nil(t, cluster.Status.ConnectionPoolerStatus) + assert.Equal(t, pgcConstants.Ready, health.State) + }) +} + +func TestPoolerConvergeEmitsReadyEventOnTransition(t *testing.T) { + t.Parallel() + + scheme := runtime.NewScheme() + require.NoError(t, enterprisev4.AddToScheme(scheme)) + require.NoError(t, cnpgv1.AddToScheme(scheme)) + require.NoError(t, corev1.AddToScheme(scheme)) + + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + } + clusterClass := &enterprisev4.PostgresClusterClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1-class", + Namespace: "default", + }, + Spec: enterprisev4.PostgresClusterClassSpec{ + Config: &enterprisev4.PostgresClusterClassConfig{ + ConnectionPoolerEnabled: ptr.To(true), + }, + }, + } + events := &captureEventEmitter{} + rwPooler := &cnpgv1.Pooler{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolerResourceName(cluster.Name, readWriteEndpoint), + Namespace: cluster.Namespace, + }, + Status: cnpgv1.PoolerStatus{Instances: 1}, + } + roPooler := &cnpgv1.Pooler{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolerResourceName(cluster.Name, readOnlyEndpoint), + Namespace: cluster.Namespace, + }, + Status: cnpgv1.PoolerStatus{Instances: 1}, + } + model := newPoolerModel( + fake.NewClientBuilder().WithScheme(scheme).WithObjects(rwPooler, roPooler).Build(), + scheme, + events, + nil, + cluster, + clusterClass, + &MergedConfig{}, + &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}}, + true, + true, + ) + + _, err := model.Converge(context.Background()) + require.NoError(t, err) + require.NotEmpty(t, events.normals) + assert.Contains(t, events.normals[0], EventPoolerReady) + + // No re-emission when condition already True. + cluster.Status.Conditions = []metav1.Condition{{ + Type: string(poolerReady), + Status: metav1.ConditionTrue, + }} + events.normals = nil + _, err = model.Converge(context.Background()) + require.NoError(t, err) + assert.Empty(t, events.normals) +} + +func TestManagedRolesConvergeDoesNotEmitFailureForPending(t *testing.T) { + t.Parallel() + + cluster := &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + ManagedRoles: []enterprisev4.ManagedRole{{Name: "app_user", Exists: true}}, + }, + } + events := &captureEventEmitter{} + model := newManagedRolesModel( + fake.NewClientBuilder().Build(), + nil, + events, + nil, + clusterRuntimeViewAdapter{model: &clusterModel{ + cnpgCluster: &cnpgv1.Cluster{ + Status: cnpgv1.ClusterStatus{ + Phase: cnpgv1.PhaseHealthy, + ManagedRolesStatus: cnpgv1.ManagedRoles{}, + }, + }, + }}, + cluster, + "pg1-secret", + ) + + _, err := model.Converge(context.Background()) + require.NoError(t, err) + assert.Empty(t, events.warnings) +} + +func TestManagedRolesConvergeEmitsReadyEventOnTransition(t *testing.T) { + t.Parallel() + + cluster := &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + ManagedRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + }, + }, + } + events := &captureEventEmitter{} + model := newManagedRolesModel( + fake.NewClientBuilder().Build(), + nil, + events, + nil, + clusterRuntimeViewAdapter{model: &clusterModel{ + cnpgCluster: &cnpgv1.Cluster{ + Status: cnpgv1.ClusterStatus{ + Phase: cnpgv1.PhaseHealthy, + ManagedRolesStatus: cnpgv1.ManagedRoles{ + ByStatus: map[cnpgv1.RoleStatus][]string{ + cnpgv1.RoleStatusReconciled: {"app_user"}, + }, + }, + }, + }, + }}, + cluster, + "pg1-secret", + ) + + _, err := model.Converge(context.Background()) + require.NoError(t, err) + require.NotEmpty(t, events.normals) + assert.Contains(t, events.normals[0], EventManagedRolesReady) + + // No re-emission when condition already True. + cluster.Status.Conditions = []metav1.Condition{{ + Type: string(managedRolesReady), + Status: metav1.ConditionTrue, + }} + events.normals = nil + _, err = model.Converge(context.Background()) + require.NoError(t, err) + assert.Empty(t, events.normals) +} diff --git a/pkg/postgresql/cluster/core/events.go b/pkg/postgresql/cluster/core/events.go index afcfd768e..551ce9147 100644 --- a/pkg/postgresql/cluster/core/events.go +++ b/pkg/postgresql/cluster/core/events.go @@ -25,6 +25,7 @@ const ( EventClusterCreateFailed = "ClusterCreateFailed" EventClusterUpdateFailed = "ClusterUpdateFailed" EventManagedRolesFailed = "ManagedRolesFailed" + EventManagedRolesReady = "ManagedRolesReady" EventPoolerReconcileFailed = "PoolerReconcileFailed" EventConfigMapReconcileFailed = "ConfigMapReconcileFailed" EventClusterDegraded = "ClusterDegraded" @@ -60,3 +61,13 @@ func (rc *ReconcileContext) emitPoolerReadyTransition(obj client.Object, conditi rc.emitNormal(obj, EventPoolerReady, "Connection poolers are ready") } } + +// emitPoolerCreationTransition emits PoolerCreationStarted only when the +// pooler condition is not already in the creating state. +func (rc *ReconcileContext) emitPoolerCreationTransition(obj client.Object, conditions []metav1.Condition) { + cond := meta.FindStatusCondition(conditions, string(poolerReady)) + if cond != nil && cond.Status == metav1.ConditionFalse && cond.Reason == string(reasonPoolerCreating) { + return + } + rc.emitNormal(obj, EventPoolerCreationStarted, "Connection poolers created, waiting for readiness") +} diff --git a/pkg/postgresql/cluster/core/types.go b/pkg/postgresql/cluster/core/types.go index 7684e6df0..b0f0ed0d7 100644 --- a/pkg/postgresql/cluster/core/types.go +++ b/pkg/postgresql/cluster/core/types.go @@ -45,6 +45,7 @@ type MergedConfig struct { type reconcileClusterPhases string type conditionTypes string type conditionReasons string +type statusMessage = string type objectKind string const ( @@ -53,9 +54,17 @@ const ( readOnlyEndpoint string = "ro" readWriteEndpoint string = "rw" - defaultDatabaseName string = "postgres" - superUsername string = "postgres" - defaultPort string = "5432" + defaultDatabaseName string = "postgres" + superUsername string = "postgres" + defaultPort string = "5432" + configKeyClusterRWEndpoint string = "CLUSTER_RW_ENDPOINT" + configKeyClusterROEndpoint string = "CLUSTER_RO_ENDPOINT" + configKeyClusterREndpoint string = "CLUSTER_R_ENDPOINT" + configKeyDefaultClusterPort string = "DEFAULT_CLUSTER_PORT" + configKeySuperUserName string = "SUPER_USER_NAME" + configKeySuperUserSecretRef string = "SUPER_USER_SECRET_REF" + configKeyPoolerRWEndpoint string = "CLUSTER_POOLER_RW_ENDPOINT" + configKeyPoolerROEndpoint string = "CLUSTER_POOLER_RO_ENDPOINT" secretKeyPassword string = "password" defaultSecretSuffix string = "-secret" @@ -77,21 +86,33 @@ const ( failedClusterPhase reconcileClusterPhases = "Failed" // condition types - clusterReady conditionTypes = "ClusterReady" - poolerReady conditionTypes = "PoolerReady" - - // condition reasons — clusterReady - reasonClusterClassNotFound conditionReasons = "ClusterClassNotFound" - reasonManagedRolesFailed conditionReasons = "ManagedRolesReconciliationFailed" - reasonClusterBuildFailed conditionReasons = "ClusterBuildFailed" - reasonClusterBuildSucceeded conditionReasons = "ClusterBuildSucceeded" - reasonClusterGetFailed conditionReasons = "ClusterGetFailed" - reasonClusterPatchFailed conditionReasons = "ClusterPatchFailed" - reasonInvalidConfiguration conditionReasons = "InvalidConfiguration" - reasonConfigMapFailed conditionReasons = "ConfigMapReconciliationFailed" + clusterReady conditionTypes = "ClusterReady" + poolerReady conditionTypes = "PoolerReady" + managedRolesReady conditionTypes = "ManagedRolesReady" + secretsReady conditionTypes = "SecretsReady" + configMapsReady conditionTypes = "ConfigMapsReady" + + // condition reasons — cluster/provisioner + reasonClusterClassNotFound conditionReasons = "ClusterClassNotFound" + reasonInvalidConfiguration conditionReasons = "InvalidConfiguration" + + // condition reasons — managedRolesReady + reasonManagedRolesReady conditionReasons = "ManagedRolesReconciled" + reasonManagedRolesPending conditionReasons = "ManagedRolesPending" + reasonManagedRolesFailed conditionReasons = "ManagedRolesReconciliationFailed" + + // condition reasons — configMapsReady + reasonConfigMapReady conditionReasons = "ConfigMapReconciled" + reasonConfigMapFailed conditionReasons = "ConfigMapReconciliationFailed" + + // condition reasons — secretsReady + reasonUserSecretPending conditionReasons = "UserSecretPending" reasonUserSecretFailed conditionReasons = "UserSecretReconciliationFailed" + reasonSuperUserSecretReady conditionReasons = "SuperUserSecretReady" reasonSuperUserSecretFailed conditionReasons = "SuperUserSecretFailed" - reasonClusterDeleteFailed conditionReasons = "ClusterDeleteFailed" + + // condition reasons — lifecycle/finalizer + reasonClusterDeleteFailed conditionReasons = "ClusterDeleteFailed" // condition reasons — poolerReady reasonPoolerReconciliationFailed conditionReasons = "PoolerReconciliationFailed" @@ -114,4 +135,38 @@ const ( reasonCNPGProvisioningFailed conditionReasons = "CNPGProvisioningFailed" reasonCNPGPluginError conditionReasons = "CNPGPluginError" reasonCNPGImageError conditionReasons = "CNPGImageError" + + // status messages — provisioner health check + msgProvisionerHealthy statusMessage = "Provisioner cluster is healthy" + msgCNPGPendingCreation statusMessage = "CNPG cluster is pending creation" + msgFmtCNPGProvisioning statusMessage = "CNPG cluster provisioning: %s" + msgCNPGSwitchover statusMessage = "Cluster changing primary node" + msgCNPGFailingOver statusMessage = "Pod missing, need to change primary" + msgFmtCNPGRestarting statusMessage = "CNPG cluster restarting: %s" + msgFmtCNPGUpgrading statusMessage = "CNPG cluster upgrading: %s" + msgCNPGApplyingConfiguration statusMessage = "Configuration change is being applied" + msgCNPGPromoting statusMessage = "Replica is being promoted to primary" + msgCNPGWaitingForUser statusMessage = "Action from the user is required" + msgCNPGUnrecoverable statusMessage = "Cluster failed, needs manual intervention" + msgCNPGCannotCreateObjects statusMessage = "Cluster resources cannot be created" + msgFmtCNPGPluginError statusMessage = "CNPG plugin error: %s" + msgFmtCNPGImageError statusMessage = "CNPG image error: %s" + msgFmtCNPGClusterPhase statusMessage = "CNPG cluster phase: %s" + + // status messages — aggregate and component readiness checks + msgPoolerDisabled statusMessage = "Connection pooler disabled" + msgPoolerConfigMissing statusMessage = "Connection pooler enabled but configuration is missing" + msgPoolersProvisioning statusMessage = "Connection poolers are being provisioned" + msgWaitRWPoolerObject statusMessage = "Waiting for RW pooler object" + msgWaitROPoolerObject statusMessage = "Waiting for RO pooler object" + msgPoolersNotReady statusMessage = "Connection poolers are not ready yet" + msgPoolersReady statusMessage = "Connection poolers are ready" + msgConfigMapRefNotPublished statusMessage = "ConfigMap reference not published yet" + msgConfigMapNotFoundYet statusMessage = "ConfigMap not found yet" + msgFmtConfigMapMissingRequiredKey statusMessage = "ConfigMap missing required key %q" + msgAccessConfigMapReady statusMessage = "Access ConfigMap is ready" + msgSecretRefNotPublished statusMessage = "Superuser secret reference not published yet" + msgSecretNotFoundYet statusMessage = "Superuser secret not found yet" + msgFmtSecretMissingKey statusMessage = "Superuser secret missing key %q" + msgSuperuserSecretReady statusMessage = "Superuser secret is ready" ) diff --git a/pkg/postgresql/cluster/core/types/constants/components.go b/pkg/postgresql/cluster/core/types/constants/components.go new file mode 100644 index 000000000..f6dcdfb7b --- /dev/null +++ b/pkg/postgresql/cluster/core/types/constants/components.go @@ -0,0 +1,9 @@ +package pgcConstants + +const ( + ComponentManagedRoles = "managedRoles" + ComponentProvisioner = "provisioner" + ComponentPooler = "pooler" + ComponentConfigMap = "configMap" + ComponentSecret = "secret" +) diff --git a/pkg/postgresql/cluster/core/types/constants/state.go b/pkg/postgresql/cluster/core/types/constants/state.go new file mode 100644 index 000000000..7f4da47e9 --- /dev/null +++ b/pkg/postgresql/cluster/core/types/constants/state.go @@ -0,0 +1,24 @@ +package pgcConstants + +type State uint64 + +const ( + Empty State = 0 + Ready State = 1 << iota + Pending + Provisioning + Configuring + Failed +) + +func (s State) Contains(state State) bool { + return s&state == state +} + +func (s State) Add(state State) State { + return s | state +} + +func (s State) Remove(state State) State { + return s &^ state +} From 95fcb2deaa7b09258eb3a7ce5dcc7ca8eec84e77 Mon Sep 17 00:00:00 2001 From: Jakub Koterba Date: Mon, 20 Apr 2026 15:50:56 +0200 Subject: [PATCH 2/2] review changes --- .../postgrescluster_controller_test.go | 152 ++++++- pkg/postgresql/cluster/core/cluster.go | 386 +++++++++++------- .../cluster/core/cluster_unit_test.go | 277 ++++++++++++- pkg/postgresql/cluster/core/events.go | 1 + pkg/postgresql/cluster/core/types.go | 3 + 5 files changed, 652 insertions(+), 167 deletions(-) diff --git a/internal/controller/postgrescluster_controller_test.go b/internal/controller/postgrescluster_controller_test.go index b7becdff5..9aeed63eb 100644 --- a/internal/controller/postgrescluster_controller_test.go +++ b/internal/controller/postgrescluster_controller_test.go @@ -54,18 +54,24 @@ import ( * PC-09 ignores no-op updates */ -func containsEvents(events *[]string, recorder *record.FakeRecorder, eventType string, event string) bool { +func CollectEvents(events *[]string, recorder *record.FakeRecorder) { for { select { case e := <-recorder.Events: *events = append(*events, e) - if strings.Contains(e, eventType) && strings.Contains(e, event) { - return true - } default: - return false + return + } + } +} + +func ContainsEvent(events []string, eventType string, event string) bool { + for _, e := range events { + if strings.Contains(e, eventType) && strings.Contains(e, event) { + return true } } + return false } var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { @@ -278,10 +284,31 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Expect(cond.Status).To(Equal(metav1.ConditionFalse)) Expect(cond.Reason).To(Equal("CNPGClusterProvisioning")) - // Simulate external CNPG controller status progression. + secretCond := meta.FindStatusCondition(pc.Status.Conditions, "SecretsReady") + Expect(secretCond).NotTo(BeNil()) + Expect(secretCond.Status).To(Equal(metav1.ConditionTrue)) + Expect(secretCond.Reason).To(Equal("SuperUserSecretReady")) + + configMapCond := meta.FindStatusCondition(pc.Status.Conditions, "ConfigMapsReady") + // ConfigMap converge runs in the runtime phase; at this point reconcile may + // still be returning from provisioner pending and not have written it yet. + Expect(configMapCond).To(BeNil()) + + // Simulate CNPG becoming healthy first, but without managed roles status published yet. cnpg := &cnpgv1.Cluster{} Expect(k8sClient.Get(ctx, pgClusterKey, cnpg)).To(Succeed()) cnpg.Status.Phase = cnpgv1.PhaseHealthy + Expect(k8sClient.Status().Update(ctx, cnpg)).To(Succeed()) + reconcileNTimes(1) + + Expect(k8sClient.Get(ctx, pgClusterKey, pc)).To(Succeed()) + managedRolesCond := meta.FindStatusCondition(pc.Status.Conditions, "ManagedRolesReady") + Expect(managedRolesCond).NotTo(BeNil()) + Expect(managedRolesCond.Status).To(Equal(metav1.ConditionFalse)) + Expect(managedRolesCond.Reason).To(Equal("ManagedRolesPending")) + + // Simulate external CNPG controller publishing managed roles status. + Expect(k8sClient.Get(ctx, pgClusterKey, cnpg)).To(Succeed()) cnpg.Status.ManagedRolesStatus = cnpgv1.ManagedRoles{ ByStatus: map[cnpgv1.RoleStatus][]string{ cnpgv1.RoleStatusReconciled: {"app_user", "app_user_rw"}, @@ -297,17 +324,17 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Expect(cond.Status).To(Equal(metav1.ConditionTrue)) Expect(cond.Reason).To(Equal("CNPGClusterHealthy")) - secretCond := meta.FindStatusCondition(pc.Status.Conditions, "SecretsReady") + secretCond = meta.FindStatusCondition(pc.Status.Conditions, "SecretsReady") Expect(secretCond).NotTo(BeNil()) Expect(secretCond.Status).To(Equal(metav1.ConditionTrue)) Expect(secretCond.Reason).To(Equal("SuperUserSecretReady")) - configMapCond := meta.FindStatusCondition(pc.Status.Conditions, "ConfigMapsReady") + configMapCond = meta.FindStatusCondition(pc.Status.Conditions, "ConfigMapsReady") Expect(configMapCond).NotTo(BeNil()) Expect(configMapCond.Status).To(Equal(metav1.ConditionTrue)) Expect(configMapCond.Reason).To(Equal("ConfigMapReconciled")) - managedRolesCond := meta.FindStatusCondition(pc.Status.Conditions, "ManagedRolesReady") + managedRolesCond = meta.FindStatusCondition(pc.Status.Conditions, "ManagedRolesReady") Expect(managedRolesCond).NotTo(BeNil()) Expect(managedRolesCond.Status).To(Equal(metav1.ConditionTrue)) Expect(managedRolesCond.Reason).To(Equal("ManagedRolesReconciled")) @@ -333,8 +360,13 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Expect(pc.Status.Resources.ConfigMapRef).NotTo(BeNil()) received := make([]string, 0, 8) - Expect(containsEvents( - &received, fakeRecorder, + CollectEvents(&received, fakeRecorder) + Expect(ContainsEvent( + received, + v1.EventTypeNormal, core.EventConfigMapReconciled, + )).To(BeTrue(), "events seen: %v", received) + Expect(ContainsEvent( + received, v1.EventTypeNormal, core.EventClusterReady, )).To(BeTrue(), "events seen: %v", received) }) @@ -442,6 +474,13 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Expect(apierrors.IsNotFound(k8sClient.Get(ctx, rwKey, &cnpgv1.Pooler{}))).To(BeTrue()) Expect(apierrors.IsNotFound(k8sClient.Get(ctx, roKey, &cnpgv1.Pooler{}))).To(BeTrue()) + pc := &enterprisev4.PostgresCluster{} + Expect(k8sClient.Get(ctx, pgClusterKey, pc)).To(Succeed()) + poolerCond := meta.FindStatusCondition(pc.Status.Conditions, "PoolerReady") + // Pooler component is gated behind provisioner readiness, so before CNPG + // becomes healthy the condition may not be written yet. + Expect(poolerCond).To(BeNil()) + cnpg := &cnpgv1.Cluster{} Expect(k8sClient.Get(ctx, pgClusterKey, cnpg)).To(Succeed()) cnpg.Status.Phase = cnpgv1.PhaseHealthy @@ -464,6 +503,28 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { g.Expect(ro.Spec.Template.ObjectMeta.Annotations).To(HaveKeyWithValue(scrapeAnnotationKey, "true")) g.Expect(ro.Spec.Template.ObjectMeta.Annotations).To(HaveKeyWithValue(pathAnnotationKey, metricsPath)) g.Expect(ro.Spec.Template.ObjectMeta.Annotations).To(HaveKeyWithValue(portAnnotationKey, poolerPort)) + + // Simulate CNPG pooler controller publishing status progression. + if rw.Status.Instances < 2 { + rw.Status.Instances = 2 + g.Expect(k8sClient.Status().Update(ctx, rw)).To(Succeed()) + } + if ro.Status.Instances < 2 { + ro.Status.Instances = 2 + g.Expect(k8sClient.Status().Update(ctx, ro)).To(Succeed()) + } + }, "20s", "250ms").Should(Succeed()) + + Eventually(func(g Gomega) { + _, err := reconciler.Reconcile(ctx, req) + g.Expect(err).NotTo(HaveOccurred()) + + updated := &enterprisev4.PostgresCluster{} + g.Expect(k8sClient.Get(ctx, pgClusterKey, updated)).To(Succeed()) + poolerReadyCond := meta.FindStatusCondition(updated.Status.Conditions, "PoolerReady") + g.Expect(poolerReadyCond).NotTo(BeNil()) + g.Expect(poolerReadyCond.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(poolerReadyCond.Reason).To(Equal("AllInstancesReady")) }, "20s", "250ms").Should(Succeed()) }) }) @@ -543,8 +604,9 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { }, "20s", "250ms").Should(BeTrue()) received := make([]string, 0, 8) - Expect(containsEvents( - &received, fakeRecorder, + CollectEvents(&received, fakeRecorder) + Expect(ContainsEvent( + received, v1.EventTypeWarning, core.EventClusterClassNotFound, )).To(BeTrue(), "events seen: %v", received) }) @@ -566,5 +628,69 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Expect(cnpg.Spec.Instances).To(Equal(int(clusterMemberCount))) }) }) + + Context("when a configmap spec changes", func() { + BeforeEach(func() { + // Keep this test focused on ConfigMap behavior; otherwise reconcile can + // stop on ManagedRolesPending before ConfigMap status is written. + pgCluster.Spec.ManagedRoles = nil + }) + + It("emits ConfigMapReconciled event on configmap update", func() { + Expect(k8sClient.Create(ctx, pgCluster)).To(Succeed()) + reconcileNTimes(2) + + // Make sure runtime can proceed (if needed in your fixture) + cnpg := &cnpgv1.Cluster{} + Expect(k8sClient.Get(ctx, pgClusterKey, cnpg)).To(Succeed()) + cnpg.Status.Phase = cnpgv1.PhaseHealthy + Expect(k8sClient.Status().Update(ctx, cnpg)).To(Succeed()) + reconcileNTimes(1) + + // Drain baseline events so we don't match the initial "created" event. + received := make([]string, 0, 16) + CollectEvents(&received, fakeRecorder) + received = received[:0] + + // Drift the managed ConfigMap. + pc := &enterprisev4.PostgresCluster{} + Eventually(func() bool { + if err := k8sClient.Get(ctx, pgClusterKey, pc); err != nil { + return false + } + return pc.Status.Resources != nil && pc.Status.Resources.ConfigMapRef != nil + }, "5s", "100ms").Should(BeTrue()) + + cmKey := types.NamespacedName{ + Name: pc.Status.Resources.ConfigMapRef.Name, + Namespace: namespace, + } + cm := &v1.ConfigMap{} + Expect(k8sClient.Get(ctx, cmKey, cm)).To(Succeed()) + delete(cm.Data, "CLUSTER_RW_ENDPOINT") // force reconciliation update + Expect(k8sClient.Update(ctx, cm)).To(Succeed()) + + // Reconcile and assert updated event. + reconcileNTimes(1) + + Eventually(func() bool { + CollectEvents(&received, fakeRecorder) + + // reason match + if !ContainsEvent(received, v1.EventTypeNormal, core.EventConfigMapReconciled) { + return false + } + // message-level match for update (not create) + for _, e := range received { + if strings.Contains(e, v1.EventTypeNormal) && + strings.Contains(e, core.EventConfigMapReconciled) && + strings.Contains(e, "updated") { + return true + } + } + return false + }, "5s", "100ms").Should(BeTrue(), "events seen: %v", received) + }) + }) }) }) diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index f0efc1926..aa566b9bd 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -24,6 +24,7 @@ import ( "strings" cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" + "github.com/go-logr/logr" password "github.com/sethvargo/go-password/password" enterprisev4 "github.com/splunk/splunk-operator/api/v4" pgcConstants "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core/types/constants" @@ -165,49 +166,92 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. secretComponent := newSecretModel(c, rc.Scheme, rc, updateComponentHealthStatus, postgresCluster, postgresSecretName) clusterComponent := newClusterModel(c, rc.Scheme, rc, updateComponentHealthStatus, postgresCluster, clusterClass, mergedConfig, postgresSecretName) - bootstrapComponents := []component{ - secretComponent, - clusterComponent, + bootstrapManager := &componentManager{ + components: []component{ + secretComponent, + clusterComponent, + }, + logger: logger, + } + result, err := bootstrapManager.Handle(ctx) + if err != nil { + return result, err + } + if result != (ctrl.Result{}) { + return result, nil } - phase := func(component component) (ctrl.Result, error) { - componentLogger := logger.WithValues("component", component.Name()) - gate, gateErr := component.EvaluatePrerequisites(ctx) - if gateErr != nil { - if isTransientError(gateErr) { - componentLogger.Error(gateErr, "Component prerequisite transient error, requeueing", "step", "prerequisites") - return transientResult(gateErr), nil - } - componentLogger.Error(gateErr, "Component prerequisite evaluation failed", "step", "prerequisites") - return ctrl.Result{}, fmt.Errorf("%s prerequisites: %w", component.Name(), gateErr) + cnpgCluster = clusterComponent.cnpgCluster + runtimeView := clusterRuntimeViewAdapter{model: clusterComponent} + + runtimeManager := &componentManager{ + components: []component{ + newManagedRolesModel(c, rc.Scheme, rc, updateComponentHealthStatus, runtimeView, postgresCluster, postgresSecretName), + newPoolerModel(c, rc.Scheme, rc, updateComponentHealthStatus, postgresCluster, clusterClass, mergedConfig, cnpgCluster, poolerEnabled, poolerConfigPresent), + newConfigMapModel(c, rc.Scheme, rc, updateComponentHealthStatus, runtimeView, postgresCluster, postgresSecretName), + }, + logger: logger, + } + + result, err = runtimeManager.Handle(ctx) + if err != nil { + return result, err + } + if result != (ctrl.Result{}) { + return result, nil + } + + logger.Info("Reconciliation complete") + if err := updatePhaseStatus(readyClusterPhase); err != nil { + if apierrors.IsConflict(err) { + return ctrl.Result{Requeue: true}, nil } - if !gate.Allowed { + return ctrl.Result{}, err + } + return ctrl.Result{}, nil +} + +func isTransientError(err error) bool { + return apierrors.IsConflict(err) || + apierrors.IsServerTimeout(err) || + apierrors.IsTooManyRequests(err) || + apierrors.IsTimeout(err) +} + +func transientResult(err error) ctrl.Result { + if apierrors.IsConflict(err) { + return ctrl.Result{Requeue: true} + } + return ctrl.Result{RequeueAfter: retryDelay} +} + +func writeComponentStatus(updateStatus healthStatusUpdater, health componentHealth) error { + if updateStatus == nil { + return nil + } + return updateStatus(health) +} + +type componentManager struct { + components []component + logger logr.Logger +} + +func (m *componentManager) Handle(ctx context.Context) (ctrl.Result, error) { + for _, component := range m.components { + componentLogger := m.logger.WithValues("component", component.Name()) + gate := component.EvaluatePrerequisites(ctx) + + if gate.Allowed { + component.Actuate(ctx) + } else { componentLogger.Info("Component blocked by prerequisites", "step", "prerequisites", "condition", gate.Health.Condition, "reason", gate.Health.Reason, "phase", gate.Health.Phase, "requeueAfter", gate.Health.Result.RequeueAfter) - health, err := component.Converge(ctx) - if err != nil && isTransientError(err) { - return transientResult(err), nil - } - if err != nil { - componentLogger.Error(err, "Blocked component convergence failed", "step", "converge") - return health.Result, fmt.Errorf("%s converge (blocked): %w", component.Name(), err) - } - return health.Result, nil - } - - if err := component.Actuate(ctx); err != nil { - if isTransientError(err) { - componentLogger.Error(err, "Component actuation transient error, requeueing", "step", "actuate") - return transientResult(err), nil - } - componentLogger.Error(err, "Component actuation failed", "step", "actuate") - return ctrl.Result{}, fmt.Errorf("%s actuate: %w", component.Name(), err) } - componentLogger.Info("Component actuation completed", "step", "actuate") health, err := component.Converge(ctx) if err != nil && isTransientError(err) { @@ -243,67 +287,10 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. "requeueAfter", health.Result.RequeueAfter) return health.Result, nil } - return ctrl.Result{}, nil - } - - for _, component := range bootstrapComponents { - result, err := phase(component) - if err != nil { - return result, err - } - if result != (ctrl.Result{}) { - return result, nil - } - } - - cnpgCluster = clusterComponent.cnpgCluster - runtimeView := clusterRuntimeViewAdapter{model: clusterComponent} - runtimeComponents := []component{ - newManagedRolesModel(c, rc.Scheme, rc, updateComponentHealthStatus, runtimeView, postgresCluster, postgresSecretName), - newPoolerModel(c, rc.Scheme, rc, updateComponentHealthStatus, postgresCluster, clusterClass, mergedConfig, cnpgCluster, poolerEnabled, poolerConfigPresent), - newConfigMapModel(c, rc.Scheme, rc, updateComponentHealthStatus, runtimeView, postgresCluster, postgresSecretName), - } - - for _, component := range runtimeComponents { - result, err := phase(component) - if err != nil { - return result, err - } - if result != (ctrl.Result{}) { - return result, nil - } - } - logger.Info("Reconciliation complete") - if err := updatePhaseStatus(readyClusterPhase); err != nil { - if apierrors.IsConflict(err) { - return ctrl.Result{Requeue: true}, nil - } - return ctrl.Result{}, err } return ctrl.Result{}, nil } -func isTransientError(err error) bool { - return apierrors.IsConflict(err) || - apierrors.IsServerTimeout(err) || - apierrors.IsTooManyRequests(err) || - apierrors.IsTimeout(err) -} - -func transientResult(err error) ctrl.Result { - if apierrors.IsConflict(err) { - return ctrl.Result{Requeue: true} - } - return ctrl.Result{RequeueAfter: retryDelay} -} - -func writeComponentStatus(updateStatus healthStatusUpdater, health componentHealth) error { - if updateStatus == nil { - return nil - } - return updateStatus(health) -} - // types/dto candidate type componentHealth struct { State pgcConstants.State @@ -315,9 +302,9 @@ type componentHealth struct { } type component interface { - Actuate(ctx context.Context) error + Actuate(ctx context.Context) Converge(ctx context.Context) (componentHealth, error) - EvaluatePrerequisites(ctx context.Context) (prerequisiteDecision, error) + EvaluatePrerequisites(ctx context.Context) prerequisiteDecision Name() string } @@ -371,6 +358,7 @@ type clusterModel struct { metricsEnabled bool health componentHealth + actuateErr error } func newClusterModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, updateStatus healthStatusUpdater, cluster *enterprisev4.PostgresCluster, clusterClass *enterprisev4.PostgresClusterClass, mergedConfig *MergedConfig, secretName string) *clusterModel { @@ -386,17 +374,18 @@ func newClusterModel(c client.Client, scheme *runtime.Scheme, events eventEmitte func (p *clusterModel) Name() string { return pgcConstants.ComponentProvisioner } -func (p *clusterModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { +func (p *clusterModel) EvaluatePrerequisites(_ context.Context) prerequisiteDecision { if health, missing := p.getHealthOnMissingSecretRef(); missing { return prerequisiteDecision{ Allowed: false, Health: health, - }, nil + } } - return prerequisiteDecision{Allowed: true}, nil + return prerequisiteDecision{Allowed: true} } -func (p *clusterModel) Actuate(ctx context.Context) error { +func (p *clusterModel) Actuate(ctx context.Context) { + p.actuateErr = nil p.cnpgCreated = false p.cnpgPatched = false @@ -408,17 +397,35 @@ func (p *clusterModel) Actuate(ctx context.Context) error { newCluster, err := buildCNPGCluster(p.scheme, p.cluster, p.mergedConfig, p.secretName, p.metricsEnabled) if err != nil { p.events.emitWarning(p.cluster, EventClusterCreateFailed, fmt.Sprintf("Failed to build CNPG cluster: %v", err)) - return err + p.health.State = pgcConstants.Failed + p.health.Reason = reasonClusterBuildFailed + p.health.Message = fmt.Sprintf("Failed to build CNPG cluster: %v", err) + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + p.actuateErr = err + return } if err = p.client.Create(ctx, newCluster); err != nil { p.events.emitWarning(p.cluster, EventClusterCreateFailed, fmt.Sprintf("Failed to create CNPG cluster: %v", err)) - return err + p.health.State = pgcConstants.Failed + p.health.Reason = reasonClusterBuildFailed + p.health.Message = fmt.Sprintf("Failed to create CNPG cluster: %v", err) + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + p.actuateErr = err + return } p.events.emitNormal(p.cluster, EventClusterCreationStarted, "CNPG cluster created, waiting for healthy state") p.cnpgCluster = newCluster p.cnpgCreated = true case err != nil: - return err + p.health.State = pgcConstants.Failed + p.health.Reason = reasonClusterGetFailed + p.health.Message = fmt.Sprintf("Failed to get CNPG cluster: %v", err) + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + p.actuateErr = err + return default: p.cnpgCluster = existingCNPG currentNormalized := normalizeCNPGClusterSpec(p.cnpgCluster.Spec, p.mergedConfig.Spec.PostgreSQLConfig) @@ -428,7 +435,13 @@ func (p *clusterModel) Actuate(ctx context.Context) error { p.cnpgCluster.Spec = desiredSpec if patchErr := patchObject(ctx, p.client, originalCluster, p.cnpgCluster, "CNPGCluster"); patchErr != nil { p.events.emitWarning(p.cluster, EventClusterUpdateFailed, fmt.Sprintf("Failed to patch CNPG cluster: %v", patchErr)) - return patchErr + p.health.State = pgcConstants.Failed + p.health.Reason = reasonClusterPatchFailed + p.health.Message = fmt.Sprintf("Failed to patch CNPG cluster: %v", patchErr) + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + p.actuateErr = patchErr + return } p.events.emitNormal(p.cluster, EventClusterUpdateStarted, "CNPG cluster spec updated, waiting for healthy state") p.cnpgPatched = true @@ -444,7 +457,7 @@ func (p *clusterModel) Actuate(ctx context.Context) error { UID: p.cnpgCluster.UID, } } - return nil + return } func (p *clusterModel) Converge(_ context.Context) (health componentHealth, err error) { @@ -465,6 +478,9 @@ func (p *clusterModel) Converge(_ context.Context) (health componentHealth, err p.health = missingHealth return p.health, nil } + if p.actuateErr != nil { + return p.health, p.actuateErr + } if p.cnpgCluster == nil { p.health.State = pgcConstants.Pending @@ -625,7 +641,8 @@ type managedRolesModel struct { cluster *enterprisev4.PostgresCluster secret string - health componentHealth + health componentHealth + actuateErr error } func newManagedRolesModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, updateStatus healthStatusUpdater, runtime clusterRuntimeView, cluster *enterprisev4.PostgresCluster, secret string) *managedRolesModel { @@ -648,17 +665,18 @@ func (m *managedRolesModel) runtimeGateHealth() (componentHealth, bool) { return componentHealth{}, false } -func (m *managedRolesModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { +func (m *managedRolesModel) EvaluatePrerequisites(_ context.Context) prerequisiteDecision { if gateHealth, blocked := m.runtimeGateHealth(); blocked { return prerequisiteDecision{ Allowed: false, Health: gateHealth, - }, nil + } } - return prerequisiteDecision{Allowed: true}, nil + return prerequisiteDecision{Allowed: true} } -func (m *managedRolesModel) Actuate(ctx context.Context) error { +func (m *managedRolesModel) Actuate(ctx context.Context) { + m.actuateErr = nil if rolesErr := reconcileManagedRoles(ctx, m.client, m.cluster, m.runtime.Cluster()); rolesErr != nil { m.events.emitWarning(m.cluster, EventManagedRolesFailed, fmt.Sprintf("Failed to reconcile managed roles: %v", rolesErr)) m.health.State = pgcConstants.Failed @@ -666,14 +684,15 @@ func (m *managedRolesModel) Actuate(ctx context.Context) error { m.health.Message = fmt.Sprintf("Failed to reconcile managed roles: %v", rolesErr) m.health.Phase = failedClusterPhase m.health.Result = ctrl.Result{} - return rolesErr + m.actuateErr = rolesErr + return } - return nil + return } func (m *managedRolesModel) Converge(ctx context.Context) (health componentHealth, err error) { _ = ctx - m.health = componentHealth{Condition: managedRolesReady} + m.health.Condition = managedRolesReady defer func() { statusErr := writeComponentStatus(m.updateStatus, m.health) if statusErr != nil { @@ -690,6 +709,9 @@ func (m *managedRolesModel) Converge(ctx context.Context) (health componentHealt m.health = gateHealth return m.health, nil } + if m.actuateErr != nil { + return m.health, m.actuateErr + } syncManagedRolesStatusFromCNPG(m.cluster, m.runtime.Cluster()) status := m.cluster.Status.ManagedRolesStatus @@ -817,6 +839,7 @@ type poolerModel struct { metricsEnabled bool health componentHealth + actuateErr error } func newPoolerModel(c client.Client, scheme *runtime.Scheme, events poolerEmitter, updateStatus healthStatusUpdater, cluster *enterprisev4.PostgresCluster, clusterClass *enterprisev4.PostgresClusterClass, mergedConfig *MergedConfig, cnpgCluster *cnpgv1.Cluster, poolerEnabled bool, poolerConfigPresent bool) *poolerModel { @@ -838,9 +861,9 @@ func newPoolerModel(c client.Client, scheme *runtime.Scheme, events poolerEmitte func (p *poolerModel) Name() string { return pgcConstants.ComponentPooler } -func (p *poolerModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { +func (p *poolerModel) EvaluatePrerequisites(_ context.Context) prerequisiteDecision { if !p.poolerEnabled || !p.poolerConfigPresent { - return prerequisiteDecision{Allowed: true}, nil + return prerequisiteDecision{Allowed: true} } if p.cnpgCluster == nil { return prerequisiteDecision{ @@ -853,7 +876,7 @@ func (p *poolerModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDeci Phase: pendingClusterPhase, Result: ctrl.Result{RequeueAfter: retryDelay}, }, - }, nil + } } if p.cnpgCluster.Status.Phase != cnpgv1.PhaseHealthy { return prerequisiteDecision{ @@ -866,35 +889,49 @@ func (p *poolerModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDeci Phase: provisioningClusterPhase, Result: ctrl.Result{RequeueAfter: retryDelay}, }, - }, nil + } } - return prerequisiteDecision{Allowed: true}, nil + return prerequisiteDecision{Allowed: true} } -func (p *poolerModel) Actuate(ctx context.Context) error { +func (p *poolerModel) Actuate(ctx context.Context) { + p.actuateErr = nil switch { case !p.poolerEnabled: if err := deleteConnectionPoolers(ctx, p.client, p.cluster); err != nil { - return err + p.health.State = pgcConstants.Failed + p.health.Reason = reasonPoolerReconciliationFailed + p.health.Message = fmt.Sprintf("Failed to delete poolers: %v", err) + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + p.actuateErr = err + return } p.cluster.Status.ConnectionPoolerStatus = nil meta.RemoveStatusCondition(&p.cluster.Status.Conditions, string(poolerReady)) - return nil + return case !p.poolerConfigPresent: - return nil + return case p.cnpgCluster == nil || p.cnpgCluster.Status.Phase != cnpgv1.PhaseHealthy: - return nil + return default: if err := createOrUpdateConnectionPoolers(ctx, p.client, p.scheme, p.cluster, p.mergedConfig, p.cnpgCluster, p.metricsEnabled); err != nil { p.events.emitWarning(p.cluster, EventPoolerReconcileFailed, fmt.Sprintf("Failed to reconcile connection pooler: %v", err)) - return err + p.health.State = pgcConstants.Failed + p.health.Reason = reasonPoolerReconciliationFailed + p.health.Message = fmt.Sprintf("Failed to reconcile connection pooler: %v", err) + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + p.actuateErr = err + return } - return nil + return } } func (p *poolerModel) Converge(ctx context.Context) (health componentHealth, err error) { - p.health = componentHealth{Condition: poolerReady} + p.health.Condition = poolerReady + oldConditions := append([]metav1.Condition(nil), p.cluster.Status.Conditions...) defer func() { statusErr := writeComponentStatus(p.updateStatus, p.health) if statusErr != nil { @@ -923,6 +960,9 @@ func (p *poolerModel) Converge(ctx context.Context) (health componentHealth, err p.health.Result = ctrl.Result{} return p.health, fmt.Errorf("pooler config missing") } + if p.actuateErr != nil { + return p.health, p.actuateErr + } if p.cnpgCluster == nil { p.health.State = pgcConstants.Pending p.health.Reason = reasonCNPGProvisioning @@ -1013,7 +1053,7 @@ func (p *poolerModel) Converge(ctx context.Context) (health componentHealth, err p.health.Message = msgPoolersReady p.health.Phase = readyClusterPhase p.health.Result = ctrl.Result{} - p.events.emitPoolerReadyTransition(p.cluster, p.cluster.Status.Conditions) + p.events.emitPoolerReadyTransition(p.cluster, oldConditions) return p.health, nil } @@ -1026,7 +1066,8 @@ type configMapModel struct { cluster *enterprisev4.PostgresCluster secret string - health componentHealth + health componentHealth + actuateErr error } func newConfigMapModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, updateStatus healthStatusUpdater, runtime clusterRuntimeView, cluster *enterprisev4.PostgresCluster, secret string) *configMapModel { @@ -1035,22 +1076,29 @@ func newConfigMapModel(c client.Client, scheme *runtime.Scheme, events eventEmit func (c *configMapModel) Name() string { return pgcConstants.ComponentConfigMap } -func (c *configMapModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { - return prerequisiteDecision{Allowed: true}, nil +func (c *configMapModel) EvaluatePrerequisites(_ context.Context) prerequisiteDecision { + return prerequisiteDecision{Allowed: true} } -func (c *configMapModel) Actuate(ctx context.Context) error { +func (c *configMapModel) Actuate(ctx context.Context) { + c.actuateErr = nil cnpgCluster := c.runtime.Cluster() if cnpgCluster == nil { - return nil + return } desiredCM, err := generateConfigMap(ctx, c.client, c.scheme, c.cluster, cnpgCluster, c.secret) if err != nil { c.events.emitWarning(c.cluster, EventConfigMapReconcileFailed, fmt.Sprintf("Failed to reconcile ConfigMap: %v", err)) - return err + c.health.State = pgcConstants.Failed + c.health.Reason = reasonConfigMapFailed + c.health.Message = fmt.Sprintf("Failed to reconcile ConfigMap: %v", err) + c.health.Phase = failedClusterPhase + c.health.Result = ctrl.Result{} + c.actuateErr = err + return } cm := &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: desiredCM.Name, Namespace: desiredCM.Namespace}} - _, err = controllerutil.CreateOrUpdate(ctx, c.client, cm, func() error { + op, err := controllerutil.CreateOrUpdate(ctx, c.client, cm, func() error { cm.Data = desiredCM.Data cm.Annotations = desiredCM.Annotations cm.Labels = desiredCM.Labels @@ -1063,16 +1111,27 @@ func (c *configMapModel) Actuate(ctx context.Context) error { }) if err != nil { c.events.emitWarning(c.cluster, EventConfigMapReconcileFailed, fmt.Sprintf("Failed to reconcile ConfigMap: %v", err)) - return err + c.health.State = pgcConstants.Failed + c.health.Reason = reasonConfigMapFailed + c.health.Message = fmt.Sprintf("Failed to reconcile ConfigMap: %v", err) + c.health.Phase = failedClusterPhase + c.health.Result = ctrl.Result{} + c.actuateErr = err + return + } + if op == controllerutil.OperationResultCreated { + c.events.emitNormal(c.cluster, EventConfigMapReconciled, fmt.Sprintf("ConfigMap %s created", desiredCM.Name)) + } else if op == controllerutil.OperationResultUpdated { + c.events.emitNormal(c.cluster, EventConfigMapReconciled, fmt.Sprintf("ConfigMap %s updated", desiredCM.Name)) } if c.cluster.Status.Resources.ConfigMapRef == nil { c.cluster.Status.Resources.ConfigMapRef = &corev1.LocalObjectReference{Name: desiredCM.Name} } - return nil + return } func (c *configMapModel) Converge(ctx context.Context) (health componentHealth, err error) { - c.health = componentHealth{Condition: configMapsReady} + c.health.Condition = configMapsReady defer func() { statusErr := writeComponentStatus(c.updateStatus, c.health) if statusErr != nil { @@ -1093,6 +1152,9 @@ func (c *configMapModel) Converge(ctx context.Context) (health componentHealth, c.health.Result = ctrl.Result{RequeueAfter: retryDelay} return c.health, nil } + if c.actuateErr != nil { + return c.health, c.actuateErr + } if c.cluster.Status.Resources == nil || c.cluster.Status.Resources.ConfigMapRef == nil { c.health.State = pgcConstants.Provisioning @@ -1159,7 +1221,8 @@ type secretModel struct { cluster *enterprisev4.PostgresCluster name string - health componentHealth + health componentHealth + actuateErr error } func newSecretModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, updateStatus healthStatusUpdater, cluster *enterprisev4.PostgresCluster, name string) *secretModel { @@ -1168,35 +1231,66 @@ func newSecretModel(c client.Client, scheme *runtime.Scheme, events eventEmitter func (s *secretModel) Name() string { return pgcConstants.ComponentSecret } -func (s *secretModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { - return prerequisiteDecision{Allowed: true}, nil +func (s *secretModel) EvaluatePrerequisites(_ context.Context) prerequisiteDecision { + return prerequisiteDecision{Allowed: true} } -func (s *secretModel) Actuate(ctx context.Context) error { +func (s *secretModel) Actuate(ctx context.Context) { + s.actuateErr = nil secret := &corev1.Secret{} secretExists, secretErr := clusterSecretExists(ctx, s.client, s.cluster.Namespace, s.name, secret) if secretErr != nil { s.events.emitWarning(s.cluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to check secret existence: %v", secretErr)) - return secretErr + s.health.State = pgcConstants.Failed + s.health.Reason = reasonSuperUserSecretFailed + s.health.Message = fmt.Sprintf("Failed to check secret existence: %v", secretErr) + s.health.Phase = failedClusterPhase + s.health.Result = ctrl.Result{} + s.actuateErr = secretErr + return } if !secretExists { if err := ensureClusterSecret(ctx, s.client, s.scheme, s.cluster, s.name, secret); err != nil { s.events.emitWarning(s.cluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to generate cluster secret: %v", err)) - return err + s.health.State = pgcConstants.Failed + s.health.Reason = reasonSuperUserSecretFailed + s.health.Message = fmt.Sprintf("Failed to generate cluster secret: %v", err) + s.health.Phase = failedClusterPhase + s.health.Result = ctrl.Result{} + s.actuateErr = err + return } } hasOwnerRef, ownerRefErr := controllerutil.HasOwnerReference(secret.GetOwnerReferences(), s.cluster, s.scheme) if ownerRefErr != nil { - return fmt.Errorf("failed to check owner reference on secret: %w", ownerRefErr) + s.health.State = pgcConstants.Failed + s.health.Reason = reasonSuperUserSecretFailed + s.health.Message = fmt.Sprintf("failed to check owner reference on secret: %v", ownerRefErr) + s.health.Phase = failedClusterPhase + s.health.Result = ctrl.Result{} + s.actuateErr = fmt.Errorf("failed to check owner reference on secret: %w", ownerRefErr) + return } if secretExists && !hasOwnerRef { originalSecret := secret.DeepCopy() if err := ctrl.SetControllerReference(s.cluster, secret, s.scheme); err != nil { - return fmt.Errorf("failed to set controller reference on existing secret: %w", err) + s.health.State = pgcConstants.Failed + s.health.Reason = reasonSuperUserSecretFailed + s.health.Message = fmt.Sprintf("failed to set controller reference on existing secret: %v", err) + s.health.Phase = failedClusterPhase + s.health.Result = ctrl.Result{} + s.actuateErr = fmt.Errorf("failed to set controller reference on existing secret: %w", err) + return } if err := patchObject(ctx, s.client, originalSecret, secret, "Secret"); err != nil { s.events.emitWarning(s.cluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to patch existing secret: %v", err)) - return err + s.health.State = pgcConstants.Failed + s.health.Reason = reasonSuperUserSecretFailed + s.health.Message = fmt.Sprintf("Failed to patch existing secret: %v", err) + s.health.Phase = failedClusterPhase + s.health.Result = ctrl.Result{} + s.actuateErr = err + return } s.events.emitNormal(s.cluster, EventClusterAdopted, fmt.Sprintf("Adopted existing CNPG cluster and secret %s", s.name)) } @@ -1206,11 +1300,11 @@ func (s *secretModel) Actuate(ctx context.Context) error { Key: secretKeyPassword, } } - return nil + return } func (s *secretModel) Converge(ctx context.Context) (health componentHealth, err error) { - s.health = componentHealth{Condition: secretsReady} + s.health.Condition = secretsReady defer func() { statusErr := writeComponentStatus(s.updateStatus, s.health) if statusErr != nil { @@ -1223,6 +1317,10 @@ func (s *secretModel) Converge(ctx context.Context) (health componentHealth, err health = s.health }() + if s.actuateErr != nil { + return s.health, s.actuateErr + } + if s.cluster.Status.Resources == nil || s.cluster.Status.Resources.SuperUserSecretRef == nil { s.health.State = pgcConstants.Provisioning s.health.Reason = reasonUserSecretPending diff --git a/pkg/postgresql/cluster/core/cluster_unit_test.go b/pkg/postgresql/cluster/core/cluster_unit_test.go index 85eeba7e2..6df965cc6 100644 --- a/pkg/postgresql/cluster/core/cluster_unit_test.go +++ b/pkg/postgresql/cluster/core/cluster_unit_test.go @@ -25,6 +25,41 @@ type configMapNotFoundClient struct { client.Client } +type getErrorClient struct { + client.Client + err error + matcher func(client.Object) bool +} + +func (c getErrorClient) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + if c.matcher != nil && c.matcher(obj) { + return c.err + } + return c.Client.Get(ctx, key, obj, opts...) +} + +type createErrorClient struct { + client.Client + err error + matcher func(client.Object) bool +} + +func (c createErrorClient) Create(ctx context.Context, obj client.Object, opts ...client.CreateOption) error { + if c.matcher != nil && c.matcher(obj) { + return c.err + } + return c.Client.Create(ctx, obj, opts...) +} + +type patchErrorClient struct { + client.Client + err error +} + +func (c patchErrorClient) Patch(_ context.Context, _ client.Object, _ client.Patch, _ ...client.PatchOption) error { + return c.err +} + type noopEventEmitter struct{} func (noopEventEmitter) emitNormal(_ client.Object, _, _ string) {} @@ -1290,10 +1325,11 @@ func TestComponentStateTriggerConditions(t *testing.T) { Namespace: "default", }, Data: map[string]string{ - "CLUSTER_RW_ENDPOINT": "pg1-rw.default", - "CLUSTER_RO_ENDPOINT": "pg1-ro.default", - "DEFAULT_CLUSTER_PORT": "5432", - "SUPER_USER_SECRET_REF": "pg1-secret", + configKeyClusterRWEndpoint: "pg1-rw.default", + configKeyClusterROEndpoint: "pg1-ro.default", + configKeyClusterREndpoint: "pg1-r.default", + configKeyDefaultClusterPort: "5432", + configKeySuperUserSecretRef: "pg1-secret", }, } examplePgCluster := &enterprisev4.PostgresCluster{ @@ -1487,8 +1523,7 @@ func TestComponentStateTriggerConditions(t *testing.T) { state := pgcConstants.Empty for i, check := range tt.components { - gate, gateErr := check.EvaluatePrerequisites(ctx) - require.NoError(t, gateErr) + gate := check.EvaluatePrerequisites(ctx) if !gate.Allowed { info := gate.Health state = info.State @@ -1497,7 +1532,7 @@ func TestComponentStateTriggerConditions(t *testing.T) { continue } - require.NoError(t, check.Actuate(ctx)) + check.Actuate(ctx) info, err := check.Converge(ctx) require.NoError(t, err) state = info.State @@ -1742,8 +1777,7 @@ func TestManagedRolesRuntimeGateHealthMatchesConverge(t *testing.T) { "pg1-secret", ) - gate, err := model.EvaluatePrerequisites(context.Background()) - require.NoError(t, err) + gate := model.EvaluatePrerequisites(context.Background()) require.False(t, gate.Allowed) health, err := model.Converge(context.Background()) @@ -1751,6 +1785,229 @@ func TestManagedRolesRuntimeGateHealthMatchesConverge(t *testing.T) { assert.Equal(t, gate.Health, health) } +func TestActuateErrorPassdownConvergeHandling(t *testing.T) { + t.Parallel() + + scheme := runtime.NewScheme() + require.NoError(t, enterprisev4.AddToScheme(scheme)) + require.NoError(t, cnpgv1.AddToScheme(scheme)) + require.NoError(t, corev1.AddToScheme(scheme)) + + instances := int32(1) + version := "16" + storageSize := resource.MustParse("10Gi") + mergedConfig := &MergedConfig{ + Spec: &enterprisev4.PostgresClusterSpec{ + Instances: &instances, + PostgresVersion: &version, + Storage: &storageSize, + Resources: &corev1.ResourceRequirements{}, + PostgreSQLConfig: map[string]string{}, + PgHBA: []string{}, + }, + } + clusterClass := &enterprisev4.PostgresClusterClass{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1-class"}, + Spec: enterprisev4.PostgresClusterClassSpec{ + Config: &enterprisev4.PostgresClusterClassConfig{ + ConnectionPoolerEnabled: ptr.To(true), + }, + }, + } + + type convergeComponent interface { + Actuate(ctx context.Context) + Converge(ctx context.Context) (componentHealth, error) + } + type testCase struct { + name string + expectedCondition conditionTypes + expectedReason conditionReasons + build func(updateStatus healthStatusUpdater) convergeComponent + } + + tests := []testCase{ + { + name: "cluster component passes actuate get error through converge", + expectedCondition: clusterReady, + expectedReason: reasonClusterGetFailed, + build: func(updateStatus healthStatusUpdater) convergeComponent { + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + Status: enterprisev4.PostgresClusterStatus{ + Resources: &enterprisev4.PostgresClusterResources{ + SuperUserSecretRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: "pg1-secret"}, + Key: "password", + }, + }, + }, + } + base := fake.NewClientBuilder().WithScheme(scheme).Build() + errClient := getErrorClient{ + Client: base, + err: assert.AnError, + matcher: func(obj client.Object) bool { + _, ok := obj.(*cnpgv1.Cluster) + return ok + }, + } + return newClusterModel(errClient, scheme, noopEventEmitter{}, updateStatus, cluster, clusterClass, mergedConfig, "pg1-secret") + }, + }, + { + name: "managed roles component passes actuate patch error through converge", + expectedCondition: managedRolesReady, + expectedReason: reasonManagedRolesFailed, + build: func(updateStatus healthStatusUpdater) convergeComponent { + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + Spec: enterprisev4.PostgresClusterSpec{ + ManagedRoles: []enterprisev4.ManagedRole{{Name: "app_user", Exists: true}}, + }, + } + cnpg := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}, + } + base := fake.NewClientBuilder().WithScheme(scheme).Build() + errClient := patchErrorClient{Client: base, err: assert.AnError} + return newManagedRolesModel( + errClient, + scheme, + noopEventEmitter{}, + updateStatus, + clusterRuntimeViewAdapter{model: &clusterModel{cnpgCluster: cnpg}}, + cluster, + "pg1-secret", + ) + }, + }, + { + name: "pooler component passes actuate create error through converge", + expectedCondition: poolerReady, + expectedReason: reasonPoolerReconciliationFailed, + build: func(updateStatus healthStatusUpdater) convergeComponent { + poolerInstances := int32(2) + poolerMode := enterprisev4.ConnectionPoolerModeTransaction + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + } + cnpg := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}, + } + base := fake.NewClientBuilder().WithScheme(scheme).Build() + errClient := createErrorClient{ + Client: base, + err: assert.AnError, + matcher: func(obj client.Object) bool { + _, ok := obj.(*cnpgv1.Pooler) + return ok + }, + } + poolerCfg := &MergedConfig{ + Spec: mergedConfig.Spec, + CNPG: &enterprisev4.CNPGConfig{ + ConnectionPooler: &enterprisev4.ConnectionPoolerConfig{ + Instances: &poolerInstances, + Mode: &poolerMode, + Config: map[string]string{}, + }, + }, + } + return newPoolerModel(errClient, scheme, noopEventEmitter{}, updateStatus, cluster, clusterClass, poolerCfg, cnpg, true, true) + }, + }, + { + name: "configmap component passes actuate pooler lookup error through converge", + expectedCondition: configMapsReady, + expectedReason: reasonConfigMapFailed, + build: func(updateStatus healthStatusUpdater) convergeComponent { + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + Status: enterprisev4.PostgresClusterStatus{Resources: &enterprisev4.PostgresClusterResources{}}, + } + cnpg := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}, + } + base := fake.NewClientBuilder().WithScheme(scheme).Build() + errClient := getErrorClient{ + Client: base, + err: assert.AnError, + matcher: func(obj client.Object) bool { + _, ok := obj.(*cnpgv1.Pooler) + return ok + }, + } + return newConfigMapModel( + errClient, + scheme, + noopEventEmitter{}, + updateStatus, + clusterRuntimeViewAdapter{model: &clusterModel{cnpgCluster: cnpg}}, + cluster, + "pg1-secret", + ) + }, + }, + { + name: "secret component passes actuate existence-check error through converge", + expectedCondition: secretsReady, + expectedReason: reasonSuperUserSecretFailed, + build: func(updateStatus healthStatusUpdater) convergeComponent { + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + Status: enterprisev4.PostgresClusterStatus{ + Resources: &enterprisev4.PostgresClusterResources{}, + }, + } + base := fake.NewClientBuilder().WithScheme(scheme).Build() + errClient := getErrorClient{ + Client: base, + err: assert.AnError, + matcher: func(obj client.Object) bool { + _, ok := obj.(*corev1.Secret) + return ok + }, + } + return newSecretModel(errClient, scheme, noopEventEmitter{}, updateStatus, cluster, "pg1-secret") + }, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + var ( + written componentHealth + writes int + ) + updateStatus := func(health componentHealth) error { + written = health + writes++ + return nil + } + model := tt.build(updateStatus) + + model.Actuate(context.Background()) + health, err := model.Converge(context.Background()) + + require.Error(t, err) + require.ErrorIs(t, err, assert.AnError) + assert.Equal(t, tt.expectedCondition, health.Condition) + assert.Equal(t, pgcConstants.Failed, health.State) + assert.Equal(t, tt.expectedReason, health.Reason) + assert.Equal(t, failedClusterPhase, health.Phase) + assert.NotEmpty(t, health.Message) + assert.Equal(t, 1, writes) + assert.Equal(t, health, written) + }) + } +} + func TestPoolerModelConvergeSetsConnectionPoolerStatus(t *testing.T) { t.Parallel() @@ -1878,7 +2135,7 @@ func TestPoolerModelConvergeSetsConnectionPoolerStatus(t *testing.T) { false, ) - require.NoError(t, model.Actuate(context.Background())) + model.Actuate(context.Background()) health, err := model.Converge(context.Background()) require.NoError(t, err) assert.Nil(t, cluster.Status.ConnectionPoolerStatus) diff --git a/pkg/postgresql/cluster/core/events.go b/pkg/postgresql/cluster/core/events.go index 551ce9147..1472a3922 100644 --- a/pkg/postgresql/cluster/core/events.go +++ b/pkg/postgresql/cluster/core/events.go @@ -12,6 +12,7 @@ import ( const ( EventSecretReady = "SecretReady" EventConfigMapReady = "ConfigMapReady" + EventConfigMapReconciled = "ConfigMapReconciled" EventClusterAdopted = "ClusterAdopted" EventClusterCreationStarted = "ClusterCreationStarted" EventClusterUpdateStarted = "ClusterUpdateStarted" diff --git a/pkg/postgresql/cluster/core/types.go b/pkg/postgresql/cluster/core/types.go index b0f0ed0d7..79e29ce63 100644 --- a/pkg/postgresql/cluster/core/types.go +++ b/pkg/postgresql/cluster/core/types.go @@ -95,6 +95,9 @@ const ( // condition reasons — cluster/provisioner reasonClusterClassNotFound conditionReasons = "ClusterClassNotFound" reasonInvalidConfiguration conditionReasons = "InvalidConfiguration" + reasonClusterBuildFailed conditionReasons = "ClusterBuildFailed" + reasonClusterGetFailed conditionReasons = "ClusterGetFailed" + reasonClusterPatchFailed conditionReasons = "ClusterPatchFailed" // condition reasons — managedRolesReady reasonManagedRolesReady conditionReasons = "ManagedRolesReconciled"