From d504e263dff7704a16de0cffb5845837b1b480ce Mon Sep 17 00:00:00 2001 From: Tyler Hillery Date: Tue, 18 Nov 2025 12:05:12 -0600 Subject: [PATCH 1/9] fix: make storage migrations idempotent --- .github/workflows/ci.yml | 11 +++++++++ .../00010-search-files-search-function.sql | 1 - migrations/tenant/0002-storage-schema.sql | 24 ++++++++++++------- .../0006-change-column-name-in-get-size.sql | 1 - .../tenant/0009-fix-search-function.sql | 1 - .../0038-iceberg-catalog-flag-on-buckets.sql | 5 +++- .../0039-add-search-v2-sort-support.sql | 1 - 7 files changed, 31 insertions(+), 13 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 831bbee75..04b4fe013 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -95,6 +95,17 @@ jobs: with: github-token: ${{ secrets.GITHUB_TOKEN }} + - name: Verify migration idempotency + run: | + psql "${{ env.DATABASE_URL }}" -c "DROP TABLE IF EXISTS storage.migrations;" + npm run migration:run + env: + DATABASE_URL: postgresql://postgres:postgres@127.0.0.1/postgres + DB_INSTALL_ROLES: true + ENABLE_DEFAULT_METRICS: false + PG_QUEUE_ENABLE: false + MULTI_TENANT: false + - name: Ensure OrioleDB migration compatibility run: | npm run infra:restart:oriole diff --git a/migrations/tenant/00010-search-files-search-function.sql b/migrations/tenant/00010-search-files-search-function.sql index ea61e67f5..fe5808469 100644 --- a/migrations/tenant/00010-search-files-search-function.sql +++ b/migrations/tenant/00010-search-files-search-function.sql @@ -1,4 +1,3 @@ -drop function storage.search; create or replace function storage.search ( prefix text, diff --git a/migrations/tenant/0002-storage-schema.sql b/migrations/tenant/0002-storage-schema.sql index d91586fea..59f0978d1 100644 --- a/migrations/tenant/0002-storage-schema.sql +++ b/migrations/tenant/0002-storage-schema.sql @@ -18,11 +18,23 @@ BEGIN END IF; -- Install ROLES - EXECUTE 'CREATE ROLE ' || anon_role || ' NOLOGIN NOINHERIT'; - EXECUTE 'CREATE ROLE ' || authenticated_role || ' NOLOGIN NOINHERIT'; - EXECUTE 'CREATE ROLE ' || service_role || ' NOLOGIN NOINHERIT bypassrls'; + IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname = anon_role) THEN + EXECUTE 'CREATE ROLE ' || anon_role || ' NOLOGIN NOINHERIT'; + END IF; + + IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname = authenticated_role ) THEN + EXECUTE 'CREATE ROLE ' || authenticated_role || ' NOLOGIN NOINHERIT'; + END IF; + + IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname = service_role) THEN + EXECUTE 'CREATE ROLE ' || service_role || ' NOLOGIN NOINHERIT bypassrls'; + END IF; + + IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'authenticator') THEN + EXECUTE 'CREATE USER authenticator NOINHERIT'; + END IF; + - create user authenticator noinherit; EXECUTE 'grant ' || anon_role || ' to authenticator'; EXECUTE 'grant ' || authenticated_role || ' to authenticator'; EXECUTE 'grant ' || service_role || ' to authenticator'; @@ -70,7 +82,6 @@ CREATE INDEX IF NOT EXISTS name_prefix_search ON storage.objects(name text_patte ALTER TABLE storage.objects ENABLE ROW LEVEL SECURITY; -drop function if exists storage.foldername; CREATE OR REPLACE FUNCTION storage.foldername(name text) RETURNS text[] LANGUAGE plpgsql @@ -83,7 +94,6 @@ BEGIN END $function$; -drop function if exists storage.filename; CREATE OR REPLACE FUNCTION storage.filename(name text) RETURNS text LANGUAGE plpgsql @@ -96,7 +106,6 @@ BEGIN END $function$; -drop function if exists storage.extension; CREATE OR REPLACE FUNCTION storage.extension(name text) RETURNS text LANGUAGE plpgsql @@ -113,7 +122,6 @@ END $function$; -- @todo can this query be optimised further? -drop function if exists storage.search; CREATE OR REPLACE FUNCTION storage.search(prefix text, bucketname text, limits int DEFAULT 100, levels int DEFAULT 1, offsets int DEFAULT 0) RETURNS TABLE ( name text, diff --git a/migrations/tenant/0006-change-column-name-in-get-size.sql b/migrations/tenant/0006-change-column-name-in-get-size.sql index f459cc1bc..b4e04e656 100644 --- a/migrations/tenant/0006-change-column-name-in-get-size.sql +++ b/migrations/tenant/0006-change-column-name-in-get-size.sql @@ -1,4 +1,3 @@ -DROP FUNCTION storage.get_size_by_bucket(); CREATE OR REPLACE FUNCTION storage.get_size_by_bucket() RETURNS TABLE ( size BIGINT, diff --git a/migrations/tenant/0009-fix-search-function.sql b/migrations/tenant/0009-fix-search-function.sql index 3d6fadc1d..f68c91718 100644 --- a/migrations/tenant/0009-fix-search-function.sql +++ b/migrations/tenant/0009-fix-search-function.sql @@ -1,4 +1,3 @@ -drop function if exists storage.search; CREATE OR REPLACE FUNCTION storage.search(prefix text, bucketname text, limits int DEFAULT 100, levels int DEFAULT 1, offsets int DEFAULT 0) RETURNS TABLE ( name text, diff --git a/migrations/tenant/0038-iceberg-catalog-flag-on-buckets.sql b/migrations/tenant/0038-iceberg-catalog-flag-on-buckets.sql index c21598aea..072319575 100644 --- a/migrations/tenant/0038-iceberg-catalog-flag-on-buckets.sql +++ b/migrations/tenant/0038-iceberg-catalog-flag-on-buckets.sql @@ -40,7 +40,9 @@ DO $$ updated_at timestamptz NOT NULL default now() ); - CREATE UNIQUE INDEX IF NOT EXISTS idx_iceberg_namespaces_bucket_id ON storage.iceberg_namespaces (bucket_id, name); + IF EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'iceberg_namespaces' AND column_name = 'bucket_id') THEN + CREATE UNIQUE INDEX IF NOT EXISTS idx_iceberg_namespaces_bucket_id ON storage.iceberg_namespaces (bucket_id, name); + END IF; CREATE TABLE IF NOT EXISTS storage.iceberg_tables ( id uuid primary key default gen_random_uuid(), @@ -52,6 +54,7 @@ DO $$ updated_at timestamptz NOT NULL default now() ); + DROP INDEX IF EXISTS idx_iceberg_tables_namespace_id; CREATE UNIQUE INDEX idx_iceberg_tables_namespace_id ON storage.iceberg_tables (namespace_id, name); ALTER TABLE storage.iceberg_namespaces ENABLE ROW LEVEL SECURITY; diff --git a/migrations/tenant/0039-add-search-v2-sort-support.sql b/migrations/tenant/0039-add-search-v2-sort-support.sql index 84168ed59..a325c0d18 100644 --- a/migrations/tenant/0039-add-search-v2-sort-support.sql +++ b/migrations/tenant/0039-add-search-v2-sort-support.sql @@ -1,4 +1,3 @@ -DROP FUNCTION IF EXISTS storage.search_v2; CREATE OR REPLACE FUNCTION storage.search_v2 ( prefix text, bucket_name text, From a8bb4209ac6c98da58cf8d9c08abfcad402d9b0e Mon Sep 17 00:00:00 2001 From: Tyler Hillery Date: Tue, 18 Nov 2025 12:51:45 -0600 Subject: [PATCH 2/9] fix: add PGRST_JWT_SECRET to github action --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 04b4fe013..c8609b36a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -100,6 +100,7 @@ jobs: psql "${{ env.DATABASE_URL }}" -c "DROP TABLE IF EXISTS storage.migrations;" npm run migration:run env: + PGRST_JWT_SECRET: ${{ secrets.PGRST_JWT_SECRET }} DATABASE_URL: postgresql://postgres:postgres@127.0.0.1/postgres DB_INSTALL_ROLES: true ENABLE_DEFAULT_METRICS: false From 76e05803c7e26048fdb283dbbe5cbd7fea7f9eb7 Mon Sep 17 00:00:00 2001 From: Tyler Hillery Date: Tue, 18 Nov 2025 13:51:44 -0600 Subject: [PATCH 3/9] fix: add drop function if exists cascade to search_v2 --- migrations/tenant/0027-search-v2.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/migrations/tenant/0027-search-v2.sql b/migrations/tenant/0027-search-v2.sql index a0521bdd1..7eff4aba4 100644 --- a/migrations/tenant/0027-search-v2.sql +++ b/migrations/tenant/0027-search-v2.sql @@ -1,4 +1,4 @@ - +DROP FUNCTION IF EXISTS storage.search_v2 CASCADE; CREATE OR REPLACE FUNCTION storage.search_v2 ( prefix text, bucket_name text, From b1d37332a04d2f83028bf6862b1164ee51a2dc9d Mon Sep 17 00:00:00 2001 From: Tyler Hillery Date: Tue, 18 Nov 2025 16:25:38 -0600 Subject: [PATCH 4/9] fix: iceberg catalog id fkey creation on if column already exists --- .../tenant/0048-iceberg-catalog-ids.sql | 25 +++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/migrations/tenant/0048-iceberg-catalog-ids.sql b/migrations/tenant/0048-iceberg-catalog-ids.sql index a6490776d..86510c1b2 100644 --- a/migrations/tenant/0048-iceberg-catalog-ids.sql +++ b/migrations/tenant/0048-iceberg-catalog-ids.sql @@ -54,8 +54,29 @@ DO $$ ALTER TABLE storage.iceberg_tables RENAME COLUMN bucket_id to bucket_name; END IF; - ALTER TABLE storage.iceberg_namespaces ADD COLUMN IF NOT EXISTS catalog_id uuid NULL REFERENCES storage.buckets_analytics(id) ON DELETE CASCADE; - ALTER TABLE storage.iceberg_tables ADD COLUMN IF NOT EXISTS catalog_id uuid NULL REFERENCES storage.buckets_analytics(id) ON DELETE CASCADE; + ALTER TABLE storage.iceberg_namespaces ADD COLUMN IF NOT EXISTS catalog_id uuid NULL; + ALTER TABLE storage.iceberg_tables ADD COLUMN IF NOT EXISTS catalog_id uuid NULL; + + -- Add FK constraints if they don't exist (handles idempotency when column already exists) + IF NOT EXISTS ( + SELECT 1 FROM information_schema.table_constraints + WHERE table_schema = 'storage' + AND table_name = 'iceberg_namespaces' + AND constraint_name = 'iceberg_namespaces_catalog_id_fkey' + ) THEN + ALTER TABLE storage.iceberg_namespaces ADD CONSTRAINT iceberg_namespaces_catalog_id_fkey + FOREIGN KEY (catalog_id) REFERENCES storage.buckets_analytics(id) ON DELETE CASCADE; + END IF; + + IF NOT EXISTS ( + SELECT 1 FROM information_schema.table_constraints + WHERE table_schema = 'storage' + AND table_name = 'iceberg_tables' + AND constraint_name = 'iceberg_tables_catalog_id_fkey' + ) THEN + ALTER TABLE storage.iceberg_tables ADD CONSTRAINT iceberg_tables_catalog_id_fkey + FOREIGN KEY (catalog_id) REFERENCES storage.buckets_analytics(id) ON DELETE CASCADE; + END IF; CREATE UNIQUE INDEX IF NOT EXISTS idx_iceberg_namespaces_bucket_id ON storage.iceberg_namespaces (catalog_id, name); CREATE UNIQUE INDEX IF NOT EXISTS idx_iceberg_tables_namespace_id ON storage.iceberg_tables (catalog_id, namespace_id, name); From 5d766db694f8f6e445acef5e0230e7052098706a Mon Sep 17 00:00:00 2001 From: Tyler Hillery Date: Tue, 18 Nov 2025 16:50:16 -0600 Subject: [PATCH 5/9] fix: drop functions --- migrations/tenant/0006-change-column-name-in-get-size.sql | 1 + migrations/tenant/0027-search-v2.sql | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/migrations/tenant/0006-change-column-name-in-get-size.sql b/migrations/tenant/0006-change-column-name-in-get-size.sql index b4e04e656..78727d10f 100644 --- a/migrations/tenant/0006-change-column-name-in-get-size.sql +++ b/migrations/tenant/0006-change-column-name-in-get-size.sql @@ -1,3 +1,4 @@ +DROP FUNCTION IF EXISTS storage.get_size_by_bucket; CREATE OR REPLACE FUNCTION storage.get_size_by_bucket() RETURNS TABLE ( size BIGINT, diff --git a/migrations/tenant/0027-search-v2.sql b/migrations/tenant/0027-search-v2.sql index 7eff4aba4..65659814d 100644 --- a/migrations/tenant/0027-search-v2.sql +++ b/migrations/tenant/0027-search-v2.sql @@ -1,4 +1,3 @@ -DROP FUNCTION IF EXISTS storage.search_v2 CASCADE; CREATE OR REPLACE FUNCTION storage.search_v2 ( prefix text, bucket_name text, From 637ddefbcb65c070619aff275a774aca6ad5d377 Mon Sep 17 00:00:00 2001 From: Tyler Hillery Date: Tue, 18 Nov 2025 16:56:29 -0600 Subject: [PATCH 6/9] feat: add ci check that pg dump is the same before and after running migrations --- .github/workflows/ci.yml | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c8609b36a..9d81afda2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -97,8 +97,23 @@ jobs: - name: Verify migration idempotency run: | - psql "${{ env.DATABASE_URL }}" -c "DROP TABLE IF EXISTS storage.migrations;" + pg_dump "$DATABASE_URL" \ + --exclude-table-data=storage.migrations \ + | grep -v '^\\restrict' \ + | grep -v '^\\unrestrict' \ + > before.sql + + psql "$DATABASE_URL" -c "DROP TABLE IF EXISTS storage.migrations;" npm run migration:run + + pg_dump "$DATABASE_URL" \ + --exclude-table-data=storage.migrations \ + | grep -v '^\\restrict' \ + | grep -v '^\\unrestrict' \ + > after.sql + + diff before.sql after.sql || (echo 'Schema mismatch!'; exit 1) + env: PGRST_JWT_SECRET: ${{ secrets.PGRST_JWT_SECRET }} DATABASE_URL: postgresql://postgres:postgres@127.0.0.1/postgres From 60c244ba1586da6f8bd124869893f9e4b4b45a19 Mon Sep 17 00:00:00 2001 From: Tyler Hillery Date: Tue, 18 Nov 2025 17:06:50 -0600 Subject: [PATCH 7/9] refactor: add restric key to pg dump --- .github/workflows/ci.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9d81afda2..69f69eae6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -99,8 +99,7 @@ jobs: run: | pg_dump "$DATABASE_URL" \ --exclude-table-data=storage.migrations \ - | grep -v '^\\restrict' \ - | grep -v '^\\unrestrict' \ + --restrict-key=test \ > before.sql psql "$DATABASE_URL" -c "DROP TABLE IF EXISTS storage.migrations;" @@ -108,8 +107,7 @@ jobs: pg_dump "$DATABASE_URL" \ --exclude-table-data=storage.migrations \ - | grep -v '^\\restrict' \ - | grep -v '^\\unrestrict' \ + --restrict-key=test \ > after.sql diff before.sql after.sql || (echo 'Schema mismatch!'; exit 1) From 7ff0b6db844b2bcbb12b3ec348351928e4d5e5fc Mon Sep 17 00:00:00 2001 From: Tyler Hillery Date: Tue, 18 Nov 2025 17:18:30 -0600 Subject: [PATCH 8/9] fix: minor formatting changes --- migrations/tenant/0006-change-column-name-in-get-size.sql | 2 +- migrations/tenant/0027-search-v2.sql | 1 + migrations/tenant/0048-iceberg-catalog-ids.sql | 1 - 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/migrations/tenant/0006-change-column-name-in-get-size.sql b/migrations/tenant/0006-change-column-name-in-get-size.sql index 78727d10f..a4f4836f3 100644 --- a/migrations/tenant/0006-change-column-name-in-get-size.sql +++ b/migrations/tenant/0006-change-column-name-in-get-size.sql @@ -1,4 +1,4 @@ -DROP FUNCTION IF EXISTS storage.get_size_by_bucket; +DROP FUNCTION IF EXISTS storage.get_size_by_bucket(); CREATE OR REPLACE FUNCTION storage.get_size_by_bucket() RETURNS TABLE ( size BIGINT, diff --git a/migrations/tenant/0027-search-v2.sql b/migrations/tenant/0027-search-v2.sql index 65659814d..a0521bdd1 100644 --- a/migrations/tenant/0027-search-v2.sql +++ b/migrations/tenant/0027-search-v2.sql @@ -1,3 +1,4 @@ + CREATE OR REPLACE FUNCTION storage.search_v2 ( prefix text, bucket_name text, diff --git a/migrations/tenant/0048-iceberg-catalog-ids.sql b/migrations/tenant/0048-iceberg-catalog-ids.sql index 86510c1b2..188b4a961 100644 --- a/migrations/tenant/0048-iceberg-catalog-ids.sql +++ b/migrations/tenant/0048-iceberg-catalog-ids.sql @@ -57,7 +57,6 @@ DO $$ ALTER TABLE storage.iceberg_namespaces ADD COLUMN IF NOT EXISTS catalog_id uuid NULL; ALTER TABLE storage.iceberg_tables ADD COLUMN IF NOT EXISTS catalog_id uuid NULL; - -- Add FK constraints if they don't exist (handles idempotency when column already exists) IF NOT EXISTS ( SELECT 1 FROM information_schema.table_constraints WHERE table_schema = 'storage' From 18197a5c6bb5bdad6280bb28330ff92c25a4fc6d Mon Sep 17 00:00:00 2001 From: Tyler Hillery Date: Tue, 18 Nov 2025 19:26:32 -0600 Subject: [PATCH 9/9] refactor: change from drop table to truncate --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 69f69eae6..59fe7e3e1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -102,7 +102,7 @@ jobs: --restrict-key=test \ > before.sql - psql "$DATABASE_URL" -c "DROP TABLE IF EXISTS storage.migrations;" + psql "$DATABASE_URL" -c "TRUNCATE TABLE storage.migrations;" npm run migration:run pg_dump "$DATABASE_URL" \