From 72e235911adedc7e73e3874a636ca73f6b2da461 Mon Sep 17 00:00:00 2001 From: Juliana Oliveira Date: Wed, 22 Oct 2025 16:39:31 -0300 Subject: [PATCH 1/4] fix(backups): reloads repo-host when secrets are updated Signed-off-by: Juliana Oliveira --- .../controller/postgrescluster/pgbackrest.go | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/internal/controller/postgrescluster/pgbackrest.go b/internal/controller/postgrescluster/pgbackrest.go index e0290c7fc3..d6b3492060 100644 --- a/internal/controller/postgrescluster/pgbackrest.go +++ b/internal/controller/postgrescluster/pgbackrest.go @@ -595,6 +595,22 @@ func (r *Reconciler) generateRepoHostIntent(ctx context.Context, postgresCluster naming.LabelData: naming.DataPGBackRest, }) + podAnnotations := naming.Merge(annotations) + // Tracks pgbackrest secret version in order to trigger repo-host updates upon change. + // Fixes a problem where repo-host certificates become stale. + existingSecret := &corev1.Secret{} + secretKey := client.ObjectKey{ + Name: naming.PGBackRestSecret(postgresCluster).Name, + Namespace: postgresCluster.GetNamespace(), + } + + if err := r.Client.Get(ctx, secretKey, existingSecret); err == nil { + if podAnnotations == nil { + podAnnotations = make(map[string]string) + } + podAnnotations["postgres-operator.crunchydata.com/pgbackrest-secret-version"] = existingSecret.ResourceVersion + } + repo := &appsv1.StatefulSet{ TypeMeta: metav1.TypeMeta{ APIVersion: appsv1.SchemeGroupVersion.String(), @@ -614,7 +630,7 @@ func (r *Reconciler) generateRepoHostIntent(ctx context.Context, postgresCluster Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Labels: labels, - Annotations: annotations, + Annotations: podAnnotations, }, }, }, From 8d36a3372e3d7e630d0211b703508e33414bb73e Mon Sep 17 00:00:00 2001 From: Juliana Oliveira Date: Thu, 23 Oct 2025 10:16:36 -0300 Subject: [PATCH 2/4] fix: adds linear rollout strategy Signed-off-by: Juliana Oliveira --- .../controller/postgrescluster/pgbackrest.go | 44 +++++++++++++++++-- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/internal/controller/postgrescluster/pgbackrest.go b/internal/controller/postgrescluster/pgbackrest.go index d6b3492060..456dc42403 100644 --- a/internal/controller/postgrescluster/pgbackrest.go +++ b/internal/controller/postgrescluster/pgbackrest.go @@ -8,6 +8,8 @@ import ( "context" "fmt" "io" + "math/rand" + "os" "reflect" "regexp" "sort" @@ -24,6 +26,7 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" @@ -604,11 +607,14 @@ func (r *Reconciler) generateRepoHostIntent(ctx context.Context, postgresCluster Namespace: postgresCluster.GetNamespace(), } - if err := r.Client.Get(ctx, secretKey, existingSecret); err == nil { - if podAnnotations == nil { - podAnnotations = make(map[string]string) + if podAnnotations == nil { + podAnnotations = make(map[string]string) + } + + if shouldAnnotateRepoHost(podAnnotations) { + if err := r.Client.Get(ctx, secretKey, existingSecret); err == nil { + podAnnotations["postgres-operator.crunchydata.com/pgbackrest-secret-version"] = existingSecret.ResourceVersion } - podAnnotations["postgres-operator.crunchydata.com/pgbackrest-secret-version"] = existingSecret.ResourceVersion } repo := &appsv1.StatefulSet{ @@ -764,6 +770,36 @@ func (r *Reconciler) generateRepoHostIntent(ctx context.Context, postgresCluster return repo, nil } +// In order to avoid multiple repo-hosts restarting per cycle, we adopt a gradual rollout strategy. +// Distribution is (pseudo-)random, but we should see ~20 restarts/per cycle. +// When all repo-hosts are annotated, this function can be removed. +func shouldAnnotateRepoHost(annotations labels.Set) bool { + if _, exists := annotations["postgres-operator.crunchydata.com/pgbackrest-secret-version"]; exists { + // 1. If the annotation already exist, we keep it. + return true + } + + // 2. Otherwise, given the start time of the rollout, we calculate a linear increasing threshold and + // roll a d100. If the value of the dice is lower than the threshold, we add the annotation in this + // reconciliation cycle. Note that this means a machine restart. + // By the end of a week, the threshold should reach 100 and any dice value will allow for the + // annotation to be added, effectively annotating all remaining pods. + if rolloutStartStr := os.Getenv("PGBACKREST_SECRET_ROLLOUT_START_TIME"); rolloutStartStr != "" { + if rolloutStart, err := time.Parse(time.RFC3339, rolloutStartStr); err == nil { + oneWeekInMinutes := 7 * 24 * 60 + minutesElapsed := int(time.Since(rolloutStart).Minutes()) + + // Increases every minute. Reconciliation cycles happen every 10 minutes. + threshold := min((minutesElapsed*100)/oneWeekInMinutes, 100) + d100 := rand.Intn(100) + + return d100 <= threshold + } + } + + return false +} + func (r *Reconciler) generateRepoVolumeIntent(postgresCluster *v1beta1.PostgresCluster, spec corev1.PersistentVolumeClaimSpec, repoName string, repoResources *RepoResources) (*corev1.PersistentVolumeClaim, error) { From 362bfec9dcff09ed17b804cd5e36dfe77f526703 Mon Sep 17 00:00:00 2001 From: Juliana Oliveira Date: Thu, 23 Oct 2025 14:58:42 -0300 Subject: [PATCH 3/4] fix: adds logs Signed-off-by: Juliana Oliveira --- .../controller/postgrescluster/pgbackrest.go | 34 ++++++++++++++++--- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/internal/controller/postgrescluster/pgbackrest.go b/internal/controller/postgrescluster/pgbackrest.go index 456dc42403..fca50e97b2 100644 --- a/internal/controller/postgrescluster/pgbackrest.go +++ b/internal/controller/postgrescluster/pgbackrest.go @@ -611,9 +611,19 @@ func (r *Reconciler) generateRepoHostIntent(ctx context.Context, postgresCluster podAnnotations = make(map[string]string) } - if shouldAnnotateRepoHost(podAnnotations) { + log := logging.FromContext(ctx) + if shouldAnnotateRepoHost(ctx, podAnnotations) { if err := r.Client.Get(ctx, secretKey, existingSecret); err == nil { podAnnotations["postgres-operator.crunchydata.com/pgbackrest-secret-version"] = existingSecret.ResourceVersion + log.Info("Added pgbackrest-secret-version annotation to repo-host", + "repoHost", repoHostName, + "resourceVersion", existingSecret.ResourceVersion) + + } else { + log.Info("Failed to fetch pgbackrest secret, skipping annotation", + "repoHost", repoHostName, + "secret", secretKey.Name, + "error", err) } } @@ -773,9 +783,11 @@ func (r *Reconciler) generateRepoHostIntent(ctx context.Context, postgresCluster // In order to avoid multiple repo-hosts restarting per cycle, we adopt a gradual rollout strategy. // Distribution is (pseudo-)random, but we should see ~20 restarts/per cycle. // When all repo-hosts are annotated, this function can be removed. -func shouldAnnotateRepoHost(annotations labels.Set) bool { +func shouldAnnotateRepoHost(ctx context.Context, annotations labels.Set) bool { + log := logging.FromContext(ctx) + if _, exists := annotations["postgres-operator.crunchydata.com/pgbackrest-secret-version"]; exists { - // 1. If the annotation already exist, we keep it. + log.Info("Repo-host already has pgbackrest-secret-version annotation, keeping it") return true } @@ -789,14 +801,26 @@ func shouldAnnotateRepoHost(annotations labels.Set) bool { oneWeekInMinutes := 7 * 24 * 60 minutesElapsed := int(time.Since(rolloutStart).Minutes()) - // Increases every minute. Reconciliation cycles happen every 10 minutes. threshold := min((minutesElapsed*100)/oneWeekInMinutes, 100) d100 := rand.Intn(100) - return d100 <= threshold + if d100 <= threshold { + log.Info("Rollout dice passed, will add pgbackrest-secret-version annotation", + "threshold", threshold, "dice", d100, "minutesElapsed", minutesElapsed) + return true + } + + log.Info("Rollout dice failed, skipping pgbackrest-secret-version annotation", + "threshold", threshold, "dice", d100, "minutesElapsed", minutesElapsed) + return false + } else { + log.Info("Failed to parse PGBACKREST_SECRET_ROLLOUT_START_TIME, skipping annotation", + "value", rolloutStartStr, "error", err) + return false } } + log.Info("PGBACKREST_SECRET_ROLLOUT_START_TIME not set, skipping annotation") return false } From 560f3726c740bfb1e444107fdc8b70fbe09a9134 Mon Sep 17 00:00:00 2001 From: Juliana Oliveira Date: Thu, 23 Oct 2025 17:54:39 -0300 Subject: [PATCH 4/4] fix: preserves statefulset annotations Signed-off-by: Juliana Oliveira --- internal/controller/postgrescluster/pgbackrest.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/internal/controller/postgrescluster/pgbackrest.go b/internal/controller/postgrescluster/pgbackrest.go index fca50e97b2..58ce50cd6c 100644 --- a/internal/controller/postgrescluster/pgbackrest.go +++ b/internal/controller/postgrescluster/pgbackrest.go @@ -599,6 +599,18 @@ func (r *Reconciler) generateRepoHostIntent(ctx context.Context, postgresCluster }) podAnnotations := naming.Merge(annotations) + + // Preserve existing pod template annotations from the current StatefulSet. + // This ensures annotations like pgbackrest-secret-version persist across reconciliations. + for _, host := range repoResources.hosts { + if host.Name == repoHostName { + if host.Spec.Template.Annotations != nil { + podAnnotations = naming.Merge(podAnnotations, host.Spec.Template.Annotations) + } + break + } + } + // Tracks pgbackrest secret version in order to trigger repo-host updates upon change. // Fixes a problem where repo-host certificates become stale. existingSecret := &corev1.Secret{}