diff --git a/internal/controller/postgrescluster/pgbackrest.go b/internal/controller/postgrescluster/pgbackrest.go index e0290c7fc..58ce50cd6 100644 --- a/internal/controller/postgrescluster/pgbackrest.go +++ b/internal/controller/postgrescluster/pgbackrest.go @@ -8,6 +8,8 @@ import ( "context" "fmt" "io" + "math/rand" + "os" "reflect" "regexp" "sort" @@ -24,6 +26,7 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" @@ -595,6 +598,47 @@ func (r *Reconciler) generateRepoHostIntent(ctx context.Context, postgresCluster naming.LabelData: naming.DataPGBackRest, }) + podAnnotations := naming.Merge(annotations) + + // Preserve existing pod template annotations from the current StatefulSet. + // This ensures annotations like pgbackrest-secret-version persist across reconciliations. + for _, host := range repoResources.hosts { + if host.Name == repoHostName { + if host.Spec.Template.Annotations != nil { + podAnnotations = naming.Merge(podAnnotations, host.Spec.Template.Annotations) + } + break + } + } + + // Tracks pgbackrest secret version in order to trigger repo-host updates upon change. + // Fixes a problem where repo-host certificates become stale. + existingSecret := &corev1.Secret{} + secretKey := client.ObjectKey{ + Name: naming.PGBackRestSecret(postgresCluster).Name, + Namespace: postgresCluster.GetNamespace(), + } + + if podAnnotations == nil { + podAnnotations = make(map[string]string) + } + + log := logging.FromContext(ctx) + if shouldAnnotateRepoHost(ctx, podAnnotations) { + if err := r.Client.Get(ctx, secretKey, existingSecret); err == nil { + podAnnotations["postgres-operator.crunchydata.com/pgbackrest-secret-version"] = existingSecret.ResourceVersion + log.Info("Added pgbackrest-secret-version annotation to repo-host", + "repoHost", repoHostName, + "resourceVersion", existingSecret.ResourceVersion) + + } else { + log.Info("Failed to fetch pgbackrest secret, skipping annotation", + "repoHost", repoHostName, + "secret", secretKey.Name, + "error", err) + } + } + repo := &appsv1.StatefulSet{ TypeMeta: metav1.TypeMeta{ APIVersion: appsv1.SchemeGroupVersion.String(), @@ -614,7 +658,7 @@ func (r *Reconciler) generateRepoHostIntent(ctx context.Context, postgresCluster Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Labels: labels, - Annotations: annotations, + Annotations: podAnnotations, }, }, }, @@ -748,6 +792,50 @@ func (r *Reconciler) generateRepoHostIntent(ctx context.Context, postgresCluster return repo, nil } +// In order to avoid multiple repo-hosts restarting per cycle, we adopt a gradual rollout strategy. +// Distribution is (pseudo-)random, but we should see ~20 restarts/per cycle. +// When all repo-hosts are annotated, this function can be removed. +func shouldAnnotateRepoHost(ctx context.Context, annotations labels.Set) bool { + log := logging.FromContext(ctx) + + if _, exists := annotations["postgres-operator.crunchydata.com/pgbackrest-secret-version"]; exists { + log.Info("Repo-host already has pgbackrest-secret-version annotation, keeping it") + return true + } + + // 2. Otherwise, given the start time of the rollout, we calculate a linear increasing threshold and + // roll a d100. If the value of the dice is lower than the threshold, we add the annotation in this + // reconciliation cycle. Note that this means a machine restart. + // By the end of a week, the threshold should reach 100 and any dice value will allow for the + // annotation to be added, effectively annotating all remaining pods. + if rolloutStartStr := os.Getenv("PGBACKREST_SECRET_ROLLOUT_START_TIME"); rolloutStartStr != "" { + if rolloutStart, err := time.Parse(time.RFC3339, rolloutStartStr); err == nil { + oneWeekInMinutes := 7 * 24 * 60 + minutesElapsed := int(time.Since(rolloutStart).Minutes()) + + threshold := min((minutesElapsed*100)/oneWeekInMinutes, 100) + d100 := rand.Intn(100) + + if d100 <= threshold { + log.Info("Rollout dice passed, will add pgbackrest-secret-version annotation", + "threshold", threshold, "dice", d100, "minutesElapsed", minutesElapsed) + return true + } + + log.Info("Rollout dice failed, skipping pgbackrest-secret-version annotation", + "threshold", threshold, "dice", d100, "minutesElapsed", minutesElapsed) + return false + } else { + log.Info("Failed to parse PGBACKREST_SECRET_ROLLOUT_START_TIME, skipping annotation", + "value", rolloutStartStr, "error", err) + return false + } + } + + log.Info("PGBACKREST_SECRET_ROLLOUT_START_TIME not set, skipping annotation") + return false +} + func (r *Reconciler) generateRepoVolumeIntent(postgresCluster *v1beta1.PostgresCluster, spec corev1.PersistentVolumeClaimSpec, repoName string, repoResources *RepoResources) (*corev1.PersistentVolumeClaim, error) {