From ef38739d93bc1539e94e620de6cb2f5b3b986328 Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Mon, 13 Jan 2025 10:28:36 -0600 Subject: [PATCH 1/3] Wait only for local write of controller SQL When Postgres replication is broken and synchronous commit is enabled, the controller blocks waiting for a remote write that will never happen. This change allows the controller to return from SQL writes and repair replication. Issue: PGO-1592 --- internal/controller/postgrescluster/pgmonitor_test.go | 2 +- internal/pgaudit/postgres.go | 2 ++ internal/pgbouncer/postgres.go | 10 +++++++++- internal/pgbouncer/postgres_test.go | 4 +++- internal/pgmonitor/postgres.go | 8 ++++++++ internal/postgis/postgis.go | 4 ++++ internal/postgis/postgis_test.go | 1 + internal/postgres/users.go | 8 ++++++++ internal/postgres/users_test.go | 2 +- 9 files changed, 37 insertions(+), 4 deletions(-) diff --git a/internal/controller/postgrescluster/pgmonitor_test.go b/internal/controller/postgrescluster/pgmonitor_test.go index 5c13e22586..36a5027aaa 100644 --- a/internal/controller/postgrescluster/pgmonitor_test.go +++ b/internal/controller/postgrescluster/pgmonitor_test.go @@ -602,7 +602,7 @@ func TestReconcilePGMonitorExporterStatus(t *testing.T) { podExecCalled: false, // Status was generated manually for this test case // TODO (jmckulk): add code to generate status - status: v1beta1.MonitoringStatus{ExporterConfiguration: "6d874c58df"}, + status: v1beta1.MonitoringStatus{ExporterConfiguration: "5c5f955485"}, statusChangedAfterReconcile: false, }} { t.Run(test.name, func(t *testing.T) { diff --git a/internal/pgaudit/postgres.go b/internal/pgaudit/postgres.go index c926168a44..27a0ffd720 100644 --- a/internal/pgaudit/postgres.go +++ b/internal/pgaudit/postgres.go @@ -35,7 +35,9 @@ func EnableInPostgreSQL(ctx context.Context, exec postgres.Executor) error { stdout, stderr, err := exec.ExecInAllDatabases(ctx, // Quiet the NOTICE from IF EXISTS, and install the pgAudit event triggers. + // Use the default setting for "synchronous_commit". // - https://www.postgresql.org/docs/current/runtime-config-client.html + // - https://www.postgresql.org/docs/current/runtime-config-wal.html // - https://github.com/pgaudit/pgaudit#settings `SET client_min_messages = WARNING; CREATE EXTENSION IF NOT EXISTS pgaudit;`, map[string]string{ diff --git a/internal/pgbouncer/postgres.go b/internal/pgbouncer/postgres.go index 4d91bfda6c..d9a9d91539 100644 --- a/internal/pgbouncer/postgres.go +++ b/internal/pgbouncer/postgres.go @@ -68,6 +68,10 @@ func DisableInPostgreSQL(ctx context.Context, exec postgres.Executor) error { // - https://www.postgresql.org/docs/current/runtime-config-client.html `SET client_min_messages = WARNING;`, + // Do not wait for changes to be replicated. [Since PostgreSQL v9.1] + // - https://www.postgresql.org/docs/current/runtime-config-wal.html + `SET synchronous_commit = LOCAL;`, + // Drop the following objects in a transaction. `BEGIN;`, @@ -102,7 +106,7 @@ SELECT pg_catalog.format('DROP OWNED BY %I CASCADE', :'username') // Remove the PgBouncer user now that the objects and other privileges are gone. stdout, stderr, err = exec.ExecInDatabasesFromQuery(ctx, `SELECT pg_catalog.current_database()`, - `SET client_min_messages = WARNING; DROP ROLE IF EXISTS :"username";`, + `SET client_min_messages = WARNING; SET synchronous_commit = LOCAL; DROP ROLE IF EXISTS :"username";`, map[string]string{ "username": postgresqlUser, @@ -130,6 +134,10 @@ func EnableInPostgreSQL( // - https://www.postgresql.org/docs/current/runtime-config-client.html `SET client_min_messages = WARNING;`, + // Do not wait for changes to be replicated. [Since PostgreSQL v9.1] + // - https://www.postgresql.org/docs/current/runtime-config-wal.html + `SET synchronous_commit = LOCAL;`, + // Create the following objects in a transaction so that permissions // are correct before any other session sees them. // - https://www.postgresql.org/docs/current/ddl-priv.html diff --git a/internal/pgbouncer/postgres_test.go b/internal/pgbouncer/postgres_test.go index 7587fe3dbb..eb3bb65818 100644 --- a/internal/pgbouncer/postgres_test.go +++ b/internal/pgbouncer/postgres_test.go @@ -49,6 +49,7 @@ func TestDisableInPostgreSQL(t *testing.T) { assert.NilError(t, err) assert.Equal(t, string(b), strings.TrimSpace(` SET client_min_messages = WARNING; +SET synchronous_commit = LOCAL; BEGIN; DROP FUNCTION IF EXISTS :"namespace".get_auth(username TEXT); DROP SCHEMA IF EXISTS :"namespace" CASCADE; @@ -90,7 +91,7 @@ COMMIT;`)) b, err := io.ReadAll(stdin) assert.NilError(t, err) - assert.Equal(t, string(b), `SET client_min_messages = WARNING; DROP ROLE IF EXISTS :"username";`) + assert.Equal(t, string(b), `SET client_min_messages = WARNING; SET synchronous_commit = LOCAL; DROP ROLE IF EXISTS :"username";`) gomega.NewWithT(t).Expect(command).To(gomega.ContainElements( `--set=username=_crunchypgbouncer`, ), "expected query parameters") @@ -135,6 +136,7 @@ func TestEnableInPostgreSQL(t *testing.T) { assert.NilError(t, err) assert.Equal(t, string(b), strings.TrimSpace(` SET client_min_messages = WARNING; +SET synchronous_commit = LOCAL; BEGIN; SELECT pg_catalog.format('CREATE ROLE %I NOLOGIN', :'username') WHERE NOT EXISTS (SELECT 1 FROM pg_catalog.pg_roles WHERE rolname = :'username') diff --git a/internal/pgmonitor/postgres.go b/internal/pgmonitor/postgres.go index a9249e7ed7..292d116e30 100644 --- a/internal/pgmonitor/postgres.go +++ b/internal/pgmonitor/postgres.go @@ -79,6 +79,10 @@ func EnableExporterInPostgreSQL(ctx context.Context, exec postgres.Executor, // - https://www.postgresql.org/docs/current/runtime-config-client.html `SET client_min_messages = WARNING;`, + // Do not wait for changes to be replicated. [Since PostgreSQL v9.1] + // - https://www.postgresql.org/docs/current/runtime-config-wal.html + `SET synchronous_commit = LOCAL;`, + // Exporter expects that extension(s) to be installed in all databases // pg_stat_statements: https://access.crunchydata.com/documentation/pgmonitor/latest/exporter/ "CREATE EXTENSION IF NOT EXISTS pg_stat_statements;", @@ -103,6 +107,10 @@ func EnableExporterInPostgreSQL(ctx context.Context, exec postgres.Executor, // - https://www.postgresql.org/docs/current/runtime-config-client.html `SET client_min_messages = WARNING;`, + // Do not wait for changes to be replicated. [Since PostgreSQL v9.1] + // - https://www.postgresql.org/docs/current/runtime-config-wal.html + `SET synchronous_commit = LOCAL;`, + // Setup.sql file from the exporter image. sql is specific // to the PostgreSQL version setup, diff --git a/internal/postgis/postgis.go b/internal/postgis/postgis.go index a0287c0c23..5a90c7afe2 100644 --- a/internal/postgis/postgis.go +++ b/internal/postgis/postgis.go @@ -26,6 +26,10 @@ func EnableInPostgreSQL(ctx context.Context, exec postgres.Executor) error { // - https://www.postgresql.org/docs/current/runtime-config-client.html `SET client_min_messages = WARNING;`, + // Do not wait for changes to be replicated. [Since PostgreSQL v9.1] + // - https://www.postgresql.org/docs/current/runtime-config-wal.html + `SET synchronous_commit = LOCAL;`, + `CREATE EXTENSION IF NOT EXISTS postgis;`, `CREATE EXTENSION IF NOT EXISTS postgis_topology;`, `CREATE EXTENSION IF NOT EXISTS fuzzystrmatch;`, diff --git a/internal/postgis/postgis_test.go b/internal/postgis/postgis_test.go index 80aa808b03..7e83c840e9 100644 --- a/internal/postgis/postgis_test.go +++ b/internal/postgis/postgis_test.go @@ -29,6 +29,7 @@ func TestEnableInPostgreSQL(t *testing.T) { b, err := io.ReadAll(stdin) assert.NilError(t, err) assert.Equal(t, string(b), `SET client_min_messages = WARNING; +SET synchronous_commit = LOCAL; CREATE EXTENSION IF NOT EXISTS postgis; CREATE EXTENSION IF NOT EXISTS postgis_topology; CREATE EXTENSION IF NOT EXISTS fuzzystrmatch; diff --git a/internal/postgres/users.go b/internal/postgres/users.go index 720aafd238..b16be66152 100644 --- a/internal/postgres/users.go +++ b/internal/postgres/users.go @@ -69,6 +69,10 @@ func WriteUsersInPostgreSQL( var err error var sql bytes.Buffer + // Do not wait for changes to be replicated. [Since PostgreSQL v9.1] + // - https://www.postgresql.org/docs/current/runtime-config-wal.html + _, _ = sql.WriteString(`SET synchronous_commit = LOCAL;`) + // Prevent unexpected dereferences by emptying "search_path". The "pg_catalog" // schema is still searched, and only temporary objects can be created. // - https://www.postgresql.org/docs/current/runtime-config-client.html#GUC-SEARCH-PATH @@ -219,6 +223,10 @@ func WriteUsersSchemasInPostgreSQL(ctx context.Context, exec Executor, // - https://www.postgresql.org/docs/current/runtime-config-client.html `SET client_min_messages = WARNING;`, + // Do not wait for changes to be replicated. [Since PostgreSQL v9.1] + // - https://www.postgresql.org/docs/current/runtime-config-wal.html + `SET synchronous_commit = LOCAL;`, + // Creates a schema named after and owned by the user // - https://www.postgresql.org/docs/current/ddl-schemas.html // - https://www.postgresql.org/docs/current/sql-createschema.html diff --git a/internal/postgres/users_test.go b/internal/postgres/users_test.go index 63ac8c4823..57587a3b11 100644 --- a/internal/postgres/users_test.go +++ b/internal/postgres/users_test.go @@ -63,7 +63,7 @@ func TestWriteUsersInPostgreSQL(t *testing.T) { b, err := io.ReadAll(stdin) assert.NilError(t, err) assert.Equal(t, string(b), strings.TrimSpace(` -SET search_path TO ''; +SET synchronous_commit = LOCAL;SET search_path TO ''; CREATE TEMPORARY TABLE input (id serial, data json); \copy input (data) from stdin with (format text) \. From 92f035e6a88a968be0a1f70cb20503e832e00104 Mon Sep 17 00:00:00 2001 From: Chris Bandy Date: Tue, 14 Jan 2025 16:27:50 +0000 Subject: [PATCH 2/3] Submit govulncheck results to GitHub Code Scanning The SARIF results from govulncheck should be compatible with GitHub since v1.1.4. See: https://github.com/golang/vuln/releases/tag/v1.1.4 --- .github/workflows/govulncheck.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/govulncheck.yaml b/.github/workflows/govulncheck.yaml index 022a97e892..df81b90e53 100644 --- a/.github/workflows/govulncheck.yaml +++ b/.github/workflows/govulncheck.yaml @@ -38,8 +38,6 @@ jobs: uses: github/codeql-action/upload-sarif@v3 with: sarif_file: 'govulncheck-results.sarif' - # TODO: https://go.dev/issue/70157 - if: ${{ false }} # Print any detected vulnerabilities to the workflow log. This step fails # when the tool detects a vulnerability in code that is called. From d0a80f04c7902d749819e889e35951198e8e2926 Mon Sep 17 00:00:00 2001 From: TJ Moore Date: Tue, 14 Jan 2025 17:46:41 -0500 Subject: [PATCH 3/3] Reconcile a Service Account for the pgBackRest Repo Host Currently, the pgBackRest repo host uses the 'default' service account. However, EKS's IAM role integration requires a specific annotation to enable this feature. This change adds a new SA for the repo host to allow PGO to reconcile a SA with this annotation, thus allowing the IAM integration to work as expected. fixes #4006 Issue: PGO-2123 --- .../controller/postgrescluster/pgbackrest.go | 49 ++++++++++++++++--- .../postgrescluster/pgbackrest_test.go | 46 +++++++++++++++-- internal/naming/names.go | 9 ++++ 3 files changed, 94 insertions(+), 10 deletions(-) diff --git a/internal/controller/postgrescluster/pgbackrest.go b/internal/controller/postgrescluster/pgbackrest.go index d0f2232472..ae68864598 100644 --- a/internal/controller/postgrescluster/pgbackrest.go +++ b/internal/controller/postgrescluster/pgbackrest.go @@ -122,9 +122,9 @@ type RepoResources struct { // strategy. func (r *Reconciler) applyRepoHostIntent(ctx context.Context, postgresCluster *v1beta1.PostgresCluster, repoHostName string, repoResources *RepoResources, - observedInstances *observedInstances) (*appsv1.StatefulSet, error) { + observedInstances *observedInstances, saName string) (*appsv1.StatefulSet, error) { - repo, err := r.generateRepoHostIntent(ctx, postgresCluster, repoHostName, repoResources, observedInstances) + repo, err := r.generateRepoHostIntent(ctx, postgresCluster, repoHostName, repoResources, observedInstances, saName) if err != nil { return nil, err } @@ -567,7 +567,7 @@ func (r *Reconciler) setScheduledJobStatus(ctx context.Context, // as needed to create and reconcile a pgBackRest dedicated repository host within the kubernetes // cluster. func (r *Reconciler) generateRepoHostIntent(ctx context.Context, postgresCluster *v1beta1.PostgresCluster, - repoHostName string, repoResources *RepoResources, observedInstances *observedInstances, + repoHostName string, repoResources *RepoResources, observedInstances *observedInstances, saName string, ) (*appsv1.StatefulSet, error) { annotations := naming.Merge( @@ -681,6 +681,8 @@ func (r *Reconciler) generateRepoHostIntent(ctx context.Context, postgresCluster repo.Spec.Template.Spec.SecurityContext = postgres.PodSecurityContext(postgresCluster) + repo.Spec.Template.Spec.ServiceAccountName = saName + pgbackrest.AddServerToRepoPod(ctx, postgresCluster, &repo.Spec.Template.Spec) if pgbackrest.RepoHostVolumeDefined(postgresCluster) { @@ -1380,10 +1382,18 @@ func (r *Reconciler) reconcilePGBackRest(ctx context.Context, return result, nil } + // reconcile the RBAC required to run the pgBackRest Repo Host + repoHostSA, err := r.reconcileRepoHostRBAC(ctx, postgresCluster) + if err != nil { + log.Error(err, "unable to reconcile pgBackRest repo host RBAC") + result.Requeue = true + return result, nil + } + var repoHost *appsv1.StatefulSet var repoHostName string // reconcile the pgbackrest repository host - repoHost, err = r.reconcileDedicatedRepoHost(ctx, postgresCluster, repoResources, instances) + repoHost, err = r.reconcileDedicatedRepoHost(ctx, postgresCluster, repoResources, instances, repoHostSA.GetName()) if err != nil { log.Error(err, "unable to reconcile pgBackRest repo host") result.Requeue = true @@ -2118,12 +2128,39 @@ func (r *Reconciler) reconcilePGBackRestRBAC(ctx context.Context, return sa, nil } +// +kubebuilder:rbac:groups="",resources="serviceaccounts",verbs={create,patch} + +// reconcileRepoHostRBAC reconciles the ServiceAccount for the pgBackRest repo host +func (r *Reconciler) reconcileRepoHostRBAC(ctx context.Context, + postgresCluster *v1beta1.PostgresCluster) (*corev1.ServiceAccount, error) { + + sa := &corev1.ServiceAccount{ObjectMeta: naming.RepoHostRBAC(postgresCluster)} + sa.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("ServiceAccount")) + + if err := r.setControllerReference(postgresCluster, sa); err != nil { + return nil, errors.WithStack(err) + } + + sa.Annotations = naming.Merge(postgresCluster.Spec.Metadata.GetAnnotationsOrNil(), + postgresCluster.Spec.Backups.PGBackRest.Metadata.GetAnnotationsOrNil()) + sa.Labels = naming.Merge(postgresCluster.Spec.Metadata.GetLabelsOrNil(), + postgresCluster.Spec.Backups.PGBackRest.Metadata.GetLabelsOrNil(), + naming.PGBackRestLabels(postgresCluster.GetName())) + + if err := r.apply(ctx, sa); err != nil { + return nil, errors.WithStack(err) + } + + return sa, nil +} + // reconcileDedicatedRepoHost is responsible for reconciling a pgBackRest dedicated repository host // StatefulSet according to a specific PostgresCluster custom resource. func (r *Reconciler) reconcileDedicatedRepoHost(ctx context.Context, postgresCluster *v1beta1.PostgresCluster, repoResources *RepoResources, - observedInstances *observedInstances) (*appsv1.StatefulSet, error) { + observedInstances *observedInstances, + saName string) (*appsv1.StatefulSet, error) { log := logging.FromContext(ctx).WithValues("reconcileResource", "repoHost") @@ -2164,7 +2201,7 @@ func (r *Reconciler) reconcileDedicatedRepoHost(ctx context.Context, } repoHostName := repoResources.hosts[0].Name repoHost, err := r.applyRepoHostIntent(ctx, postgresCluster, repoHostName, repoResources, - observedInstances) + observedInstances, saName) if err != nil { log.Error(err, "reconciling repository host") return nil, err diff --git a/internal/controller/postgrescluster/pgbackrest_test.go b/internal/controller/postgrescluster/pgbackrest_test.go index 5b024af643..b3934d0fd1 100644 --- a/internal/controller/postgrescluster/pgbackrest_test.go +++ b/internal/controller/postgrescluster/pgbackrest_test.go @@ -328,6 +328,8 @@ schedulerName: default-scheduler securityContext: fsGroup: 26 fsGroupChangePolicy: OnRootMismatch +serviceAccount: hippocluster-repohost +serviceAccountName: hippocluster-repohost shareProcessNamespace: true terminationGracePeriodSeconds: 30 tolerations: @@ -724,6 +726,42 @@ func TestReconcilePGBackRestRBAC(t *testing.T) { assert.Assert(t, foundSubject) } +func TestReconcileRepoHostRBAC(t *testing.T) { + // Garbage collector cleans up test resources before the test completes + if strings.EqualFold(os.Getenv("USE_EXISTING_CLUSTER"), "true") { + t.Skip("USE_EXISTING_CLUSTER: Test fails due to garbage collection") + } + + ctx := context.Background() + _, tClient := setupKubernetes(t) + require.ParallelCapacity(t, 0) + + r := &Reconciler{Client: tClient, Owner: client.FieldOwner(t.Name())} + + clusterName := "hippocluster" + clusterUID := "hippouid" + + ns := setupNamespace(t, tClient) + + // create a PostgresCluster to test with + postgresCluster := fakePostgresCluster(clusterName, ns.GetName(), clusterUID, true) + postgresCluster.Status.PGBackRest = &v1beta1.PGBackRestStatus{ + Repos: []v1beta1.RepoStatus{{Name: "repo1", StanzaCreated: false}}, + } + + serviceAccount, err := r.reconcileRepoHostRBAC(ctx, postgresCluster) + assert.NilError(t, err) + assert.Assert(t, serviceAccount != nil) + + // verify the service account has been created + sa := &corev1.ServiceAccount{} + err = tClient.Get(ctx, types.NamespacedName{ + Name: naming.RepoHostRBAC(postgresCluster).Name, + Namespace: postgresCluster.GetNamespace(), + }, sa) + assert.NilError(t, err) +} + func TestReconcileStanzaCreate(t *testing.T) { cfg, tClient := setupKubernetes(t) require.ParallelCapacity(t, 0) @@ -2672,12 +2710,12 @@ func TestGenerateRepoHostIntent(t *testing.T) { t.Run("empty", func(t *testing.T) { _, err := r.generateRepoHostIntent(ctx, &v1beta1.PostgresCluster{}, "", &RepoResources{}, - &observedInstances{}) + &observedInstances{}, "") assert.NilError(t, err) }) cluster := &v1beta1.PostgresCluster{} - sts, err := r.generateRepoHostIntent(ctx, cluster, "", &RepoResources{}, &observedInstances{}) + sts, err := r.generateRepoHostIntent(ctx, cluster, "", &RepoResources{}, &observedInstances{}, "") assert.NilError(t, err) t.Run("ServiceAccount", func(t *testing.T) { @@ -2698,7 +2736,7 @@ func TestGenerateRepoHostIntent(t *testing.T) { }, } observed := &observedInstances{forCluster: []*Instance{{Pods: []*corev1.Pod{{}}}}} - sts, err := r.generateRepoHostIntent(ctx, cluster, "", &RepoResources{}, observed) + sts, err := r.generateRepoHostIntent(ctx, cluster, "", &RepoResources{}, observed, "") assert.NilError(t, err) assert.Equal(t, *sts.Spec.Replicas, int32(1)) }) @@ -2710,7 +2748,7 @@ func TestGenerateRepoHostIntent(t *testing.T) { }, } observed := &observedInstances{forCluster: []*Instance{{}}} - sts, err := r.generateRepoHostIntent(ctx, cluster, "", &RepoResources{}, observed) + sts, err := r.generateRepoHostIntent(ctx, cluster, "", &RepoResources{}, observed, "") assert.NilError(t, err) assert.Equal(t, *sts.Spec.Replicas, int32(0)) }) diff --git a/internal/naming/names.go b/internal/naming/names.go index b07c5b1a59..fc310d837f 100644 --- a/internal/naming/names.go +++ b/internal/naming/names.go @@ -490,6 +490,15 @@ func PGBackRestRBAC(cluster *v1beta1.PostgresCluster) metav1.ObjectMeta { } } +// RepoHostRBAC returns the ObjectMeta necessary to lookup the ServiceAccount for +// the pgBackRest Repo Host +func RepoHostRBAC(cluster *v1beta1.PostgresCluster) metav1.ObjectMeta { + return metav1.ObjectMeta{ + Namespace: cluster.Namespace, + Name: cluster.Name + "-repohost", + } +} + // PGBackRestRepoVolume returns the ObjectMeta for a pgBackRest repository volume func PGBackRestRepoVolume(cluster *v1beta1.PostgresCluster, repoName string) metav1.ObjectMeta {