From 282b0e62623824d645ccd4e1a1ab356f6792b25e Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Tue, 21 Oct 2025 12:42:50 +0200
Subject: [PATCH 01/26] feat(core): Add partitioned table support

---
 docs/how-to/configure-postgres.md            |  22 ++
 etl-api/src/db/publications.rs               |   3 +
 etl-postgres/src/tokio/test_utils.rs         |   2 +-
 etl/src/replication/client.rs                | 120 ++++++--
 etl/src/test_utils/test_schema.rs            |  71 +++++
 etl/src/types/mod.rs                         |   2 +-
 etl/tests/pipeline_with_partitioned_table.rs | 303 +++++++++++++++++++
 etl/tests/replication.rs                     |  43 ++-
 8 files changed, 544 insertions(+), 22 deletions(-)
 create mode 100644 etl/tests/pipeline_with_partitioned_table.rs
diff --git a/docs/how-to/configure-postgres.md b/docs/how-to/configure-postgres.md
index fe57cd07b..bf99db0a4 100644
--- a/docs/how-to/configure-postgres.md
+++ b/docs/how-to/configure-postgres.md
@@ -116,6 +116,28 @@ CREATE PUBLICATION all_tables FOR ALL TABLES;
 CREATE PUBLICATION inserts_only FOR TABLE users WITH (publish = 'insert');
 ```
 
+#### Partitioned Tables
+
+If you want to replicate partitioned tables, you must use `publish_via_partition_root = true` when creating your publication. This option tells Postgres to treat the [partitioned table as a single table](https://www.postgresql.org/docs/current/sql-createpublication.html#SQL-CREATEPUBLICATION-PARAMS-WITH-PUBLISH-VIA-PARTITION-ROOT) from the replication perspective, rather than replicating each partition individually. All changes to any partition will be published as changes to the parent table:
+
+```sql
+-- Create publication with partitioned table support
+CREATE PUBLICATION my_publication FOR TABLE users, orders WITH (publish_via_partition_root = true);
+
+-- For all tables including partitioned tables
+CREATE PUBLICATION all_tables FOR ALL TABLES WITH (publish_via_partition_root = true);
+```
+
+**Limitation:** If this option is enabled, `TRUNCATE` operations performed directly on individual partitions are not replicated. To replicate a truncate operation, you must execute it on the parent table instead:
+
+```sql
+-- This will NOT be replicated
+TRUNCATE TABLE orders_2024_q1;
+
+-- This WILL be replicated
+TRUNCATE TABLE orders;
+```
+
 ### Managing Publications
 
 ```sql
diff --git a/etl-api/src/db/publications.rs b/etl-api/src/db/publications.rs
index 38a4ab575..5c01ac859 100644
--- a/etl-api/src/db/publications.rs
+++ b/etl-api/src/db/publications.rs
@@ -43,6 +43,9 @@ pub async fn create_publication(
         }
     }
 
+    // Ensure partitioned tables publish via ancestor/root schema for logical replication
+    query.push_str(" with (publish_via_partition_root = true)");
+
     pool.execute(query.as_str()).await?;
     Ok(())
 }
diff --git a/etl-postgres/src/tokio/test_utils.rs b/etl-postgres/src/tokio/test_utils.rs
index 6692e486b..7aad2b62b 100644
--- a/etl-postgres/src/tokio/test_utils.rs
+++ b/etl-postgres/src/tokio/test_utils.rs
@@ -61,7 +61,7 @@ impl<G: GenericClient> PgDatabase<G> {
             .collect::<Vec<_>>();
 
         let create_publication_query = format!(
-            "create publication {} for table {}",
+            "create publication {} for table {} with (publish_via_partition_root = true)",
             publication_name,
             table_names.join(", ")
         );
diff --git a/etl/src/replication/client.rs b/etl/src/replication/client.rs
index 2746f6c47..16f72a296 100644
--- a/etl/src/replication/client.rs
+++ b/etl/src/replication/client.rs
@@ -407,28 +407,66 @@ impl PgReplicationClient {
     }
 
     /// Retrieves the OIDs of all tables included in a publication.
+    ///
+    /// For partitioned tables with `publish_via_partition_root=true`, this returns only the parent
+    /// table OID. The query uses a recursive CTE to walk up the partition inheritance hierarchy
+    /// and identify root tables that have no parent themselves.
     pub async fn get_publication_table_ids(
         &self,
         publication_name: &str,
     ) -> EtlResult<Vec<TableId>> {
-        let publication_query = format!(
-            "select c.oid from pg_publication_tables pt 
-         join pg_class c on c.relname = pt.tablename 
-         join pg_namespace n on n.oid = c.relnamespace AND n.nspname = pt.schemaname 
-         where pt.pubname = {};",
-            quote_literal(publication_name)
+        let query = format!(
+            r#"
+            with recursive has_rel as (
+                -- Check if publication uses pg_publication_rel (explicit table list)
+                select exists(
+                    select 1
+                    from pg_publication_rel r
+                    join pg_publication p on p.oid = r.prpubid
+                    where p.pubname = {pub}
+                ) as has
+            ),
+            pub_tables as (
+                -- If publication has explicit relations, use pg_publication_rel
+                select r.prrelid as oid
+                from pg_publication_rel r
+                join pg_publication p on p.oid = r.prpubid
+                where p.pubname = {pub} and (select has from has_rel)
+                union all
+                -- Otherwise, use pg_publication_tables (for ALL TABLES publications)
+                select c.oid
+                from pg_publication_tables pt
+                join pg_class c on c.relname = pt.tablename
+                where pt.pubname = {pub} and not (select has from has_rel)
+            ),
+            recurse(relid) as (
+                -- Start with all published tables
+                select oid from pub_tables
+                union all
+                -- Recursively walk up to find parent tables in inheritance hierarchy
+                select i.inhparent
+                from pg_inherits i
+                join recurse r on r.relid = i.inhrelid
+            )
+            -- Return only root tables (those without a parent)
+            select distinct relid as oid
+            from recurse r
+            where not exists (
+                select 1 from pg_inherits i where i.inhrelid = r.relid
+            );
+            "#,
+            pub = quote_literal(publication_name)
         );
 
-        let mut table_ids = vec![];
-        for msg in self.client.simple_query(&publication_query).await? {
+        let mut roots = vec![];
+        for msg in self.client.simple_query(&query).await? {
             if let SimpleQueryMessage::Row(row) = msg {
-                // For the sake of simplicity, we refer to the table oid as table id.
                 let table_id = Self::get_row_value::<TableId>(&row, "oid", "pg_class").await?;
-                table_ids.push(table_id);
+                roots.push(table_id);
             }
         }
 
-        Ok(table_ids)
+        Ok(roots)
     }
 
     /// Starts a logical replication stream from the specified publication and slot.
@@ -662,7 +700,8 @@ impl PgReplicationClient {
                         when 0 then true
                         else (a.attnum in (select * from pub_attrs))
                         end
-                    )",
+                    )"
+                        .to_string(),
                 )
             } else {
                 // Postgres 14 or earlier or unknown, fallback to no column-level filtering
@@ -677,20 +716,65 @@ impl PgReplicationClient {
                         )",
                         publication = quote_literal(publication),
                     ),
-                    "and (select count(*) from pub_table) > 0",
+                    format!(
+                        "and ((select count(*) from pub_table) > 0 or exists(
+                            -- Also allow if parent table is in publication (for partitioned tables)
+                            select 1 from pg_inherits i
+                            join pg_publication_rel r on r.prrelid = i.inhparent
+                            join pg_publication p on p.oid = r.prpubid
+                            where i.inhrelid = {table_id} and p.pubname = {publication}
+                        ))",
+                        publication = quote_literal(publication),
+                    ),
                 )
             }
         } else {
-            ("".into(), "")
+            ("".to_string(), "".to_string())
+        };
+
+        let has_pub_cte = !pub_cte.is_empty();
+
+        let cte_prefix = if has_pub_cte {
+            // If there's already a pub_cte WITH clause, add our CTEs to it with a comma
+            format!("{pub_cte},")
+        } else {
+            // If no pub_cte, start our own WITH clause (no need for RECURSIVE)
+            "with ".to_string()
         };
 
         let column_info_query = format!(
-            "{pub_cte}
+            "{cte_prefix}
+            -- Find direct parent of current table (if it's a partition)
+            direct_parent as (
+                select i.inhparent as parent_oid
+                from pg_inherits i
+                where i.inhrelid = {table_id}::oid
+                limit 1
+            ),
+            -- Get parent table's primary key columns
+            parent_pk_cols as (
+                select array_agg(a.attname order by x.n) as pk_column_names
+                from pg_constraint con
+                join unnest(con.conkey) with ordinality as x(attnum, n) on true
+                join pg_attribute a on a.attrelid = con.conrelid and a.attnum = x.attnum
+                join direct_parent dp on con.conrelid = dp.parent_oid
+                where con.contype = 'p'
+                group by con.conname
+            )
             select a.attname,
                 a.atttypid,
                 a.atttypmod,
                 a.attnotnull,
-                coalesce(i.indisprimary, false) as primary
+                case
+                    -- Direct primary key on this relation
+                    when coalesce(i.indisprimary, false) = true then true
+                    -- Inherit primary key from parent partitioned table if column name matches
+                    when exists (
+                        select 1 from parent_pk_cols pk
+                        where a.attname = any(pk.pk_column_names)
+                    ) then true
+                    else false
+                end as primary
             from pg_attribute a
             left join pg_index i
                 on a.attrelid = i.indrelid
@@ -807,9 +891,9 @@ impl PgReplicationClient {
             )
         } else {
             format!(
-                r#"copy {} ({}) to stdout with (format text);"#,
-                table_name.as_quoted_identifier(),
+                r#"copy (select {} from {}) to stdout with (format text);"#,
                 column_list,
+                table_name.as_quoted_identifier(),
             )
         };
 
diff --git a/etl/src/test_utils/test_schema.rs b/etl/src/test_utils/test_schema.rs
index 8faa6449b..2e4047fc5 100644
--- a/etl/src/test_utils/test_schema.rs
+++ b/etl/src/test_utils/test_schema.rs
@@ -127,6 +127,77 @@ pub async fn setup_test_database_schema<G: GenericClient>(
     }
 }
 
+/// Creates a partitioned table with the given name and partitions.
+///
+/// This function creates:
+/// 1. A parent partitioned table with a primary key
+/// 2. Several child partitions based on the provided partition specifications
+///
+/// Returns the table ID of the parent table and a list of partition table IDs.
+pub async fn create_partitioned_table<G: GenericClient>(
+    database: &PgDatabase<G>,
+    table_name: TableName,
+    partition_specs: &[(&str, &str)], // (partition_name, partition_constraint)
+) -> Result<(TableId, Vec<TableId>), tokio_postgres::Error> {
+    let create_parent_query = format!(
+        "create table {} (
+            id bigserial,
+            data text NOT NULL,
+            partition_key integer NOT NULL,
+            primary key (id, partition_key)
+        ) partition by range (partition_key)",
+        table_name.as_quoted_identifier()
+    );
+
+    database.run_sql(&create_parent_query).await?;
+
+    let parent_row = database
+        .client
+        .as_ref()
+        .unwrap()
+        .query_one(
+            "select c.oid from pg_class c join pg_namespace n on n.oid = c.relnamespace
+             where n.nspname = $1 and c.relname = $2",
+            &[&table_name.schema, &table_name.name],
+        )
+        .await?;
+
+    let parent_table_id: TableId = parent_row.get(0);
+    let mut partition_table_ids = Vec::new();
+
+    for (partition_name, partition_constraint) in partition_specs {
+        let partition_table_name = TableName::new(
+            table_name.schema.clone(),
+            format!("{}_{}", table_name.name, partition_name),
+        );
+
+        let create_partition_query = format!(
+            "create table {} partition of {} for values {}",
+            partition_table_name.as_quoted_identifier(),
+            table_name.as_quoted_identifier(),
+            partition_constraint
+        );
+
+        database.run_sql(&create_partition_query).await?;
+
+        let partition_row = database
+            .client
+            .as_ref()
+            .unwrap()
+            .query_one(
+                "select c.oid from pg_class c join pg_namespace n on n.oid = c.relnamespace
+                 where n.nspname = $1 and c.relname = $2",
+                &[&partition_table_name.schema, &partition_table_name.name],
+            )
+            .await?;
+
+        let partition_table_id: TableId = partition_row.get(0);
+        partition_table_ids.push(partition_table_id);
+    }
+
+    Ok((parent_table_id, partition_table_ids))
+}
+
 /// Inserts users data into the database for testing purposes.
 pub async fn insert_users_data<G: GenericClient>(
     client: &mut PgDatabase<G>,
diff --git a/etl/src/types/mod.rs b/etl/src/types/mod.rs
index 05103904c..867af9a23 100644
--- a/etl/src/types/mod.rs
+++ b/etl/src/types/mod.rs
@@ -13,7 +13,7 @@ pub use event::*;
 pub use pipeline::*;
 pub use table_row::*;
 
-pub use crate::conversions::numeric::PgNumeric;
+pub use crate::conversions::numeric::{PgNumeric, Sign};
 
 // Re-exports.
 pub use etl_postgres::types::*;
diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs
new file mode 100644
index 000000000..a40e69b49
--- /dev/null
+++ b/etl/tests/pipeline_with_partitioned_table.rs
@@ -0,0 +1,303 @@
+#![cfg(feature = "test-utils")]
+
+use etl::destination::memory::MemoryDestination;
+use etl::state::table::TableReplicationPhaseType;
+use etl::test_utils::database::{spawn_source_database, test_table_name};
+use etl::test_utils::event::group_events_by_type_and_table_id;
+use etl::test_utils::notify::NotifyingStore;
+use etl::test_utils::pipeline::create_pipeline;
+use etl::test_utils::test_destination_wrapper::TestDestinationWrapper;
+use etl::test_utils::test_schema::create_partitioned_table;
+use etl::types::EventType;
+use etl::types::PipelineId;
+use etl_telemetry::tracing::init_test_tracing;
+use rand::random;
+
+/// Initial copy for a partitioned table (published via root) copies all existing rows.
+#[tokio::test(flavor = "multi_thread")]
+async fn partitioned_table_copy_replicates_existing_data() {
+    init_test_tracing();
+    let database = spawn_source_database().await;
+
+    let table_name = test_table_name("partitioned_events");
+    let partition_specs = [
+        ("p1", "from (1) to (100)"),
+        ("p2", "from (100) to (200)"),
+        ("p3", "from (200) to (300)"),
+    ];
+
+    let (parent_table_id, _partition_table_ids) =
+        create_partitioned_table(&database, table_name.clone(), &partition_specs)
+            .await
+            .expect("Failed to create partitioned table");
+
+    database
+        .run_sql(&format!(
+            "insert into {} (data, partition_key) values
+             ('event1', 50), ('event2', 150), ('event3', 250)",
+            table_name.as_quoted_identifier()
+        ))
+        .await
+        .unwrap();
+
+    let publication_name = "test_partitioned_pub".to_string();
+    database
+        .create_publication(&publication_name, std::slice::from_ref(&table_name))
+        .await
+        .expect("Failed to create publication");
+
+    let state_store = NotifyingStore::new();
+    let destination = TestDestinationWrapper::wrap(MemoryDestination::new());
+
+    // Register notification for initial copy completion.
+    let parent_sync_done = state_store
+        .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone)
+        .await;
+
+    let pipeline_id: PipelineId = random();
+    let mut pipeline = create_pipeline(
+        &database.config,
+        pipeline_id,
+        publication_name,
+        state_store.clone(),
+        destination.clone(),
+    );
+
+    pipeline.start().await.unwrap();
+
+    parent_sync_done.notified().await;
+
+    let _ = pipeline.shutdown_and_wait().await;
+
+    let table_rows = destination.get_table_rows().await;
+    let total_rows: usize = table_rows.values().map(|rows| rows.len()).sum();
+
+    assert_eq!(
+        total_rows, 3,
+        "Expected 3 rows synced (one per partition), but got {total_rows}"
+    );
+
+    let table_states = state_store.get_table_replication_states().await;
+
+    assert!(
+        table_states.contains_key(&parent_table_id),
+        "Parent table should be tracked in state"
+    );
+    assert_eq!(
+        table_states.len(),
+        1,
+        "Only the parent table should be tracked in state"
+    );
+
+    let parent_table_rows = table_rows
+        .iter()
+        .filter(|(table_id, _)| **table_id == parent_table_id)
+        .map(|(_, rows)| rows.len())
+        .sum::<usize>();
+    assert_eq!(
+        parent_table_rows, 3,
+        "Parent table should contain all rows when publishing via root"
+    );
+}
+
+/// Initial copy completes and CDC streams new rows from newly added partitions.
+#[tokio::test(flavor = "multi_thread")]
+async fn partitioned_table_copy_and_streams_new_data_from_new_partition() {
+    init_test_tracing();
+    let database = spawn_source_database().await;
+
+    let table_name = test_table_name("partitioned_events_late");
+    let initial_partition_specs = [("p1", "from (1) to (100)"), ("p2", "from (100) to (200)")];
+
+    let (parent_table_id, _initial_partition_table_ids) =
+        create_partitioned_table(&database, table_name.clone(), &initial_partition_specs)
+            .await
+            .expect("Failed to create initial partitioned table");
+
+    database
+        .run_sql(&format!(
+            "insert into {} (data, partition_key) values \
+             ('event1', 50), ('event2', 150)",
+            table_name.as_quoted_identifier()
+        ))
+        .await
+        .unwrap();
+
+    let publication_name = "test_partitioned_pub_late".to_string();
+    database
+        .create_publication(&publication_name, std::slice::from_ref(&table_name))
+        .await
+        .expect("Failed to create publication");
+
+    let state_store = NotifyingStore::new();
+    let destination = TestDestinationWrapper::wrap(MemoryDestination::new());
+
+    // Register notification for initial copy completion.
+    let parent_sync_done = state_store
+        .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone)
+        .await;
+
+    let pipeline_id: PipelineId = random();
+    let mut pipeline = create_pipeline(
+        &database.config,
+        pipeline_id,
+        publication_name,
+        state_store.clone(),
+        destination.clone(),
+    );
+
+    pipeline.start().await.unwrap();
+
+    parent_sync_done.notified().await;
+
+    let new_partition_name = format!("{}_{}", table_name.name, "p3");
+    let new_partition_qualified_name = format!("{}.{}", table_name.schema, new_partition_name);
+    database
+        .run_sql(&format!(
+            "create table {} partition of {} for values from (200) to (300)",
+            new_partition_qualified_name,
+            table_name.as_quoted_identifier()
+        ))
+        .await
+        .unwrap();
+
+    database
+        .run_sql(&format!(
+            "insert into {} (data, partition_key) values ('event3', 250)",
+            table_name.as_quoted_identifier()
+        ))
+        .await
+        .unwrap();
+
+    // Wait for CDC to deliver the new row.
+    let inserts_notify = destination
+        .wait_for_events_count(vec![(EventType::Insert, 1)])
+        .await;
+    inserts_notify.notified().await;
+
+    let _ = pipeline.shutdown_and_wait().await;
+
+    let table_rows = destination.get_table_rows().await;
+    let total_rows: usize = table_rows.values().map(|rows| rows.len()).sum();
+    assert_eq!(
+        total_rows, 2,
+        "Expected 2 rows synced from initial copy, got {total_rows}"
+    );
+
+    let table_states = state_store.get_table_replication_states().await;
+    assert!(table_states.contains_key(&parent_table_id));
+    assert_eq!(table_states.len(), 1);
+
+    let parent_table_rows = table_rows
+        .iter()
+        .filter(|(table_id, _)| **table_id == parent_table_id)
+        .map(|(_, rows)| rows.len())
+        .sum::<usize>();
+    assert_eq!(parent_table_rows, 2);
+
+    let events = destination.get_events().await;
+    let grouped = group_events_by_type_and_table_id(&events);
+    let parent_inserts = grouped
+        .get(&(EventType::Insert, parent_table_id))
+        .cloned()
+        .unwrap_or_default();
+    assert_eq!(parent_inserts.len(), 1);
+}
+
+/// Dropping a child partition must not emit DELETE/TRUNCATE events.
+#[tokio::test(flavor = "multi_thread")]
+async fn partition_drop_does_not_emit_delete_or_truncate() {
+    init_test_tracing();
+    let database = spawn_source_database().await;
+
+    let table_name = test_table_name("partitioned_events_drop");
+    let partition_specs = [("p1", "from (1) to (100)"), ("p2", "from (100) to (200)")];
+
+    let (parent_table_id, _partition_table_ids) =
+        create_partitioned_table(&database, table_name.clone(), &partition_specs)
+            .await
+            .expect("Failed to create partitioned table");
+
+    database
+        .run_sql(&format!(
+            "insert into {} (data, partition_key) values \
+             ('event1', 50), ('event2', 150)",
+            table_name.as_quoted_identifier()
+        ))
+        .await
+        .unwrap();
+
+    let publication_name = "test_partitioned_pub_drop".to_string();
+    database
+        .create_publication(&publication_name, std::slice::from_ref(&table_name))
+        .await
+        .expect("Failed to create publication");
+
+    let state_store = NotifyingStore::new();
+    let destination = TestDestinationWrapper::wrap(MemoryDestination::new());
+
+    let parent_sync_done = state_store
+        .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone)
+        .await;
+
+    let pipeline_id: PipelineId = random();
+    let mut pipeline = create_pipeline(
+        &database.config,
+        pipeline_id,
+        publication_name,
+        state_store.clone(),
+        destination.clone(),
+    );
+
+    pipeline.start().await.unwrap();
+    parent_sync_done.notified().await;
+
+    let events_before = destination.get_events().await;
+    let grouped_before = group_events_by_type_and_table_id(&events_before);
+    let del_before = grouped_before
+        .get(&(EventType::Delete, parent_table_id))
+        .map(|v| v.len())
+        .unwrap_or(0);
+    let trunc_before = grouped_before
+        .get(&(EventType::Truncate, parent_table_id))
+        .map(|v| v.len())
+        .unwrap_or(0);
+
+    // Detach and drop one child partition (DDL should not generate DML events)
+    let child_p1_name = format!("{}_{}", table_name.name, "p1");
+    let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name);
+    database
+        .run_sql(&format!(
+            "alter table {} detach partition {}",
+            table_name.as_quoted_identifier(),
+            child_p1_qualified
+        ))
+        .await
+        .unwrap();
+    database
+        .run_sql(&format!("drop table {child_p1_qualified}"))
+        .await
+        .unwrap();
+
+    let _ = pipeline.shutdown_and_wait().await;
+
+    let events_after = destination.get_events().await;
+    let grouped_after = group_events_by_type_and_table_id(&events_after);
+    let del_after = grouped_after
+        .get(&(EventType::Delete, parent_table_id))
+        .map(|v| v.len())
+        .unwrap_or(0);
+    let trunc_after = grouped_after
+        .get(&(EventType::Truncate, parent_table_id))
+        .map(|v| v.len())
+        .unwrap_or(0);
+
+    assert_eq!(
+        del_after, del_before,
+        "Partition drop must not emit DELETE events"
+    );
+    assert_eq!(
+        trunc_after, trunc_before,
+        "Partition drop must not emit TRUNCATE events"
+    );
+}
diff --git a/etl/tests/replication.rs b/etl/tests/replication.rs
index a1d9dda1e..21e7ee1c7 100644
--- a/etl/tests/replication.rs
+++ b/etl/tests/replication.rs
@@ -1,10 +1,13 @@
 #![cfg(feature = "test-utils")]
 
+use std::collections::HashSet;
+
 use etl::error::ErrorKind;
 use etl::replication::client::PgReplicationClient;
 use etl::test_utils::database::{spawn_source_database, test_table_name};
 use etl::test_utils::pipeline::test_slot_name;
 use etl::test_utils::table::assert_table_schema;
+use etl::test_utils::test_schema::create_partitioned_table;
 use etl_postgres::tokio::test_utils::{TableModification, id_column_schema};
 use etl_postgres::types::ColumnSchema;
 use etl_telemetry::tracing::init_test_tracing;
@@ -550,11 +553,47 @@ async fn test_publication_creation_and_check() {
     );
 
     // We check the table ids of the tables in the publication.
-    let table_ids = parent_client
+    let table_ids: HashSet<_> = parent_client
         .get_publication_table_ids("my_publication")
+        .await
+        .unwrap()
+        .into_iter()
+        .collect();
+    assert_eq!(table_ids, HashSet::from([table_1_id, table_2_id]));
+}
+
+#[tokio::test(flavor = "multi_thread")]
+async fn test_publication_table_ids_collapse_partitioned_root() {
+    init_test_tracing();
+    let database = spawn_source_database().await;
+
+    let client = PgReplicationClient::connect(database.config.clone())
         .await
         .unwrap();
-    assert_eq!(table_ids, vec![table_1_id, table_2_id]);
+
+    // We create a partitioned parent with two child partitions.
+    let table_name = test_table_name("part_parent");
+    let (parent_table_id, _children) = create_partitioned_table(
+        &database,
+        table_name.clone(),
+        &[("p1", "from (1) to (100)"), ("p2", "from (100) to (200)")],
+    )
+    .await
+    .unwrap();
+
+    let publication_name = "pub_part_root";
+    database
+        .create_publication(publication_name, std::slice::from_ref(&table_name))
+        .await
+        .unwrap();
+
+    let id = client
+        .get_publication_table_ids(publication_name)
+        .await
+        .unwrap();
+
+    // We expect to get only the parent table id.
+    assert_eq!(id, vec![parent_table_id]);
 }
 
 #[tokio::test(flavor = "multi_thread")]

From 0a57cfede8d674f655fb0820313f448eafcb5d5e Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Tue, 21 Oct 2025 12:43:18 +0200
Subject: [PATCH 02/26] Improve

---
 etl/src/replication/client.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etl/src/replication/client.rs b/etl/src/replication/client.rs
index 16f72a296..13c04b89f 100644
--- a/etl/src/replication/client.rs
+++ b/etl/src/replication/client.rs
@@ -701,7 +701,7 @@ impl PgReplicationClient {
                         else (a.attnum in (select * from pub_attrs))
                         end
                     )"
-                        .to_string(),
+                    .to_string(),
                 )
             } else {
                 // Postgres 14 or earlier or unknown, fallback to no column-level filtering

From cd0b607a2bc16592f9b556b780027f2457d164ed Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Tue, 21 Oct 2025 12:54:21 +0200
Subject: [PATCH 03/26] Improve

---
 etl/src/replication/client.rs                | 64 ++++++++++++--------
 etl/tests/pipeline_with_partitioned_table.rs |  3 -
 2 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/etl/src/replication/client.rs b/etl/src/replication/client.rs
index 13c04b89f..5d52600d5 100644
--- a/etl/src/replication/client.rs
+++ b/etl/src/replication/client.rs
@@ -417,42 +417,45 @@ impl PgReplicationClient {
     ) -> EtlResult<Vec<TableId>> {
         let query = format!(
             r#"
-            with recursive has_rel as (
-                -- Check if publication uses pg_publication_rel (explicit table list)
-                select exists(
-                    select 1
-                    from pg_publication_rel r
-                    join pg_publication p on p.oid = r.prpubid
-                    where p.pubname = {pub}
-                ) as has
-            ),
-            pub_tables as (
-                -- If publication has explicit relations, use pg_publication_rel
+            with recursive pub_tables as (
+                -- Get explicit tables from publication (for regular publications)
                 select r.prrelid as oid
                 from pg_publication_rel r
                 join pg_publication p on p.oid = r.prpubid
-                where p.pubname = {pub} and (select has from has_rel)
+                where p.pubname = {pub}
+
                 union all
-                -- Otherwise, use pg_publication_tables (for ALL TABLES publications)
+
+                -- Get tables from pg_publication_tables (for ALL TABLES publications)
+                -- Only executes if pg_publication_rel is empty for this publication
                 select c.oid
                 from pg_publication_tables pt
                 join pg_class c on c.relname = pt.tablename
-                where pt.pubname = {pub} and not (select has from has_rel)
+                join pg_namespace n on n.oid = c.relnamespace and n.nspname = pt.schemaname
+                where pt.pubname = {pub}
+                and not exists (
+                    select 1
+                    from pg_publication_rel r
+                    join pg_publication p on p.oid = r.prpubid
+                    where p.pubname = {pub}
+                )
             ),
-            recurse(relid) as (
-                -- Start with all published tables
+            hierarchy(relid) as (
+                -- Start with published tables
                 select oid from pub_tables
-                union all
-                -- Recursively walk up to find parent tables in inheritance hierarchy
+
+                union
+
+                -- Recursively find parent tables in inheritance hierarchy
                 select i.inhparent
                 from pg_inherits i
-                join recurse r on r.relid = i.inhrelid
+                join hierarchy h on h.relid = i.inhrelid
             )
             -- Return only root tables (those without a parent)
             select distinct relid as oid
-            from recurse r
+            from hierarchy
             where not exists (
-                select 1 from pg_inherits i where i.inhrelid = r.relid
+                select 1 from pg_inherits i where i.inhrelid = hierarchy.relid
             );
             "#,
             pub = quote_literal(publication_name)
@@ -687,19 +690,30 @@ impl PgReplicationClient {
                 (
                     format!(
                         "with pub_attrs as (
-                            select unnest(r.prattrs)
+                            select unnest(r.prattrs) as attnum
                             from pg_publication_rel r
-                            left join pg_publication p on r.prpubid = p.oid
+                            join pg_publication p on r.prpubid = p.oid
                             where p.pubname = {publication}
                             and r.prrelid = {table_id}
+                        ),
+                        -- For partitioned tables, also check if parent is in publication
+                        pub_parent as (
+                            select 1 as exists_in_pub
+                            from pg_inherits i
+                            join pg_publication_rel r on r.prrelid = i.inhparent
+                            join pg_publication p on p.oid = r.prpubid
+                            where i.inhrelid = {table_id}
+                            and p.pubname = {publication}
                         )",
                         publication = quote_literal(publication),
                     ),
                     "and (
+                        -- Include column if it's in pub_attrs or if parent table is in publication
                         case (select count(*) from pub_attrs)
                         when 0 then true
-                        else (a.attnum in (select * from pub_attrs))
+                        else (a.attnum in (select attnum from pub_attrs))
                         end
+                        or exists(select 1 from pub_parent)
                     )"
                     .to_string(),
                 )
@@ -710,7 +724,7 @@ impl PgReplicationClient {
                         "with pub_table as (
                             select 1 as exists_in_pub
                             from pg_publication_rel r
-                            left join pg_publication p on r.prpubid = p.oid
+                            join pg_publication p on r.prpubid = p.oid
                             where p.pubname = {publication}
                             and r.prrelid = {table_id}
                         )",
diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs
index a40e69b49..303025987 100644
--- a/etl/tests/pipeline_with_partitioned_table.rs
+++ b/etl/tests/pipeline_with_partitioned_table.rs
@@ -13,7 +13,6 @@ use etl::types::PipelineId;
 use etl_telemetry::tracing::init_test_tracing;
 use rand::random;
 
-/// Initial copy for a partitioned table (published via root) copies all existing rows.
 #[tokio::test(flavor = "multi_thread")]
 async fn partitioned_table_copy_replicates_existing_data() {
     init_test_tracing();
@@ -100,7 +99,6 @@ async fn partitioned_table_copy_replicates_existing_data() {
     );
 }
 
-/// Initial copy completes and CDC streams new rows from newly added partitions.
 #[tokio::test(flavor = "multi_thread")]
 async fn partitioned_table_copy_and_streams_new_data_from_new_partition() {
     init_test_tracing();
@@ -204,7 +202,6 @@ async fn partitioned_table_copy_and_streams_new_data_from_new_partition() {
     assert_eq!(parent_inserts.len(), 1);
 }
 
-/// Dropping a child partition must not emit DELETE/TRUNCATE events.
 #[tokio::test(flavor = "multi_thread")]
 async fn partition_drop_does_not_emit_delete_or_truncate() {
     init_test_tracing();

From 52fd049b30735bc4774e94fa13c2a741883e99ee Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Tue, 21 Oct 2025 12:57:02 +0200
Subject: [PATCH 04/26] Improve

---
 etl/src/replication/client.rs | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/etl/src/replication/client.rs b/etl/src/replication/client.rs
index 5d52600d5..556c6dcb6 100644
--- a/etl/src/replication/client.rs
+++ b/etl/src/replication/client.rs
@@ -695,25 +695,14 @@ impl PgReplicationClient {
                             join pg_publication p on r.prpubid = p.oid
                             where p.pubname = {publication}
                             and r.prrelid = {table_id}
-                        ),
-                        -- For partitioned tables, also check if parent is in publication
-                        pub_parent as (
-                            select 1 as exists_in_pub
-                            from pg_inherits i
-                            join pg_publication_rel r on r.prrelid = i.inhparent
-                            join pg_publication p on p.oid = r.prpubid
-                            where i.inhrelid = {table_id}
-                            and p.pubname = {publication}
                         )",
                         publication = quote_literal(publication),
                     ),
                     "and (
-                        -- Include column if it's in pub_attrs or if parent table is in publication
                         case (select count(*) from pub_attrs)
                         when 0 then true
                         else (a.attnum in (select attnum from pub_attrs))
                         end
-                        or exists(select 1 from pub_parent)
                     )"
                     .to_string(),
                 )

From 8624c3a867904f65b1d01465f812e2d1a7fb1c28 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Tue, 21 Oct 2025 16:17:52 +0200
Subject: [PATCH 05/26] Improve

---
 etl/src/replication/client.rs                | 164 ++++----
 etl/tests/pipeline_with_partitioned_table.rs | 415 +++++++++++++++++++
 2 files changed, 503 insertions(+), 76 deletions(-)

diff --git a/etl/src/replication/client.rs b/etl/src/replication/client.rs
index 556c6dcb6..648a5686a 100644
--- a/etl/src/replication/client.rs
+++ b/etl/src/replication/client.rs
@@ -160,6 +160,14 @@ impl PgReplicationSlotTransaction {
     }
 }
 
+/// Result of building publication filter SQL components.
+struct PublicationFilter {
+    /// CTEs to include in the WITH clause (empty string if no publication filtering).
+    ctes: String,
+    /// Predicate to include in the WHERE clause (empty string if no publication filtering).
+    predicate: String,
+}
+
 /// A client for interacting with Postgres's logical replication features.
 ///
 /// This client provides methods for creating replication slots, managing transactions,
@@ -674,6 +682,64 @@ impl PgReplicationClient {
         );
     }
 
+    /// Builds SQL fragments for filtering columns based on publication settings.
+    ///
+    /// Returns CTEs and predicates that filter columns according to:
+    /// - Postgres 15+: Column-level filtering using `prattrs`
+    /// - Postgres 14 and earlier: Table-level filtering only
+    /// - No publication: No filtering (empty strings)
+    fn build_publication_filter_sql(
+        &self,
+        table_id: TableId,
+        publication_name: Option<&str>,
+    ) -> PublicationFilter {
+        let Some(publication_name) = publication_name else {
+            return PublicationFilter {
+                ctes: String::new(),
+                predicate: String::new(),
+            };
+        };
+
+        // Postgres 15+ supports column-level filtering via prattrs
+        if let Some(server_version) = self.server_version
+            && server_version.get() >= 150000
+        {
+            return PublicationFilter {
+                ctes: format!(
+                    "pub_attrs as (
+                        select unnest(r.prattrs) as attnum
+                        from pg_publication_rel r
+                        join pg_publication p on r.prpubid = p.oid
+                        where p.pubname = {publication}
+                            and r.prrelid = {table_id}
+                    ),",
+                    publication = quote_literal(publication_name),
+                ),
+                predicate: "and (
+                        case (select count(*) from pub_attrs)
+                            when 0 then true
+                            else (a.attnum in (select attnum from pub_attrs))
+                        end
+                    )".to_string(),
+            };
+        }
+
+        // Postgres 14 and earlier: table-level filtering only
+        PublicationFilter {
+            ctes: format!(
+                "pub_table as (
+                    select 1 as exists_in_pub
+                    from pg_publication_rel r
+                    join pg_publication p on r.prpubid = p.oid
+                    where p.pubname = {publication}
+                        and r.prrelid = {table_id}
+                ),",
+                publication = quote_literal(publication_name),
+            ),
+            predicate: "and (select count(*) from pub_table) > 0".to_string(),
+        }
+    }
+
     /// Retrieves schema information for all columns in a table.
     ///
     /// If a publication is specified, only columns included in that publication
@@ -683,78 +749,21 @@ impl PgReplicationClient {
         table_id: TableId,
         publication: Option<&str>,
     ) -> EtlResult<Vec<ColumnSchema>> {
-        let (pub_cte, pub_pred) = if let Some(publication) = publication {
-            if let Some(server_version) = self.server_version
-                && server_version.get() >= 150000
-            {
-                (
-                    format!(
-                        "with pub_attrs as (
-                            select unnest(r.prattrs) as attnum
-                            from pg_publication_rel r
-                            join pg_publication p on r.prpubid = p.oid
-                            where p.pubname = {publication}
-                            and r.prrelid = {table_id}
-                        )",
-                        publication = quote_literal(publication),
-                    ),
-                    "and (
-                        case (select count(*) from pub_attrs)
-                        when 0 then true
-                        else (a.attnum in (select attnum from pub_attrs))
-                        end
-                    )"
-                    .to_string(),
-                )
-            } else {
-                // Postgres 14 or earlier or unknown, fallback to no column-level filtering
-                (
-                    format!(
-                        "with pub_table as (
-                            select 1 as exists_in_pub
-                            from pg_publication_rel r
-                            join pg_publication p on r.prpubid = p.oid
-                            where p.pubname = {publication}
-                            and r.prrelid = {table_id}
-                        )",
-                        publication = quote_literal(publication),
-                    ),
-                    format!(
-                        "and ((select count(*) from pub_table) > 0 or exists(
-                            -- Also allow if parent table is in publication (for partitioned tables)
-                            select 1 from pg_inherits i
-                            join pg_publication_rel r on r.prrelid = i.inhparent
-                            join pg_publication p on p.oid = r.prpubid
-                            where i.inhrelid = {table_id} and p.pubname = {publication}
-                        ))",
-                        publication = quote_literal(publication),
-                    ),
-                )
-            }
-        } else {
-            ("".to_string(), "".to_string())
-        };
-
-        let has_pub_cte = !pub_cte.is_empty();
-
-        let cte_prefix = if has_pub_cte {
-            // If there's already a pub_cte WITH clause, add our CTEs to it with a comma
-            format!("{pub_cte},")
-        } else {
-            // If no pub_cte, start our own WITH clause (no need for RECURSIVE)
-            "with ".to_string()
-        };
+        // Build publication filter CTEs and predicates based on Postgres version.
+        let publication_filter =
+            self.build_publication_filter_sql(table_id, publication);
 
         let column_info_query = format!(
-            "{cte_prefix}
-            -- Find direct parent of current table (if it's a partition)
+            r#"
+            with {publication_ctes}
+            -- Find the direct parent table (for child partitions)
             direct_parent as (
                 select i.inhparent as parent_oid
                 from pg_inherits i
-                where i.inhrelid = {table_id}::oid
+                where i.inhrelid = {table_id}
                 limit 1
             ),
-            -- Get parent table's primary key columns
+            -- Extract primary key column names from the parent table
             parent_pk_cols as (
                 select array_agg(a.attname order by x.n) as pk_column_names
                 from pg_constraint con
@@ -764,16 +773,18 @@ impl PgReplicationClient {
                 where con.contype = 'p'
                 group by con.conname
             )
-            select a.attname,
+            select
+                a.attname,
                 a.atttypid,
                 a.atttypmod,
                 a.attnotnull,
                 case
-                    -- Direct primary key on this relation
+                    -- Check if column has a direct primary key index
                     when coalesce(i.indisprimary, false) = true then true
-                    -- Inherit primary key from parent partitioned table if column name matches
+                    -- Check if column name matches parent's primary key (for partitions)
                     when exists (
-                        select 1 from parent_pk_cols pk
+                        select 1
+                        from parent_pk_cols pk
                         where a.attname = any(pk.pk_column_names)
                     ) then true
                     else false
@@ -784,16 +795,17 @@ impl PgReplicationClient {
                 and a.attnum = any(i.indkey)
                 and i.indisprimary = true
             where a.attnum > 0::int2
-            and not a.attisdropped
-            and a.attgenerated = ''
-            and a.attrelid = {table_id}
-            {pub_pred}
+                and not a.attisdropped
+                and a.attgenerated = ''
+                and a.attrelid = {table_id}
+                {publication_predicate}
             order by a.attnum
-            ",
+            "#,
+            publication_ctes = publication_filter.ctes,
+            publication_predicate = publication_filter.predicate,
         );
-
+        
         let mut column_schemas = vec![];
-
         for message in self.client.simple_query(&column_info_query).await? {
             if let SimpleQueryMessage::Row(row) = message {
                 let name = Self::get_row_value::<String>(&row, "attname", "pg_attribute").await?;
diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs
index 303025987..790f07e89 100644
--- a/etl/tests/pipeline_with_partitioned_table.rs
+++ b/etl/tests/pipeline_with_partitioned_table.rs
@@ -13,6 +13,7 @@ use etl::types::PipelineId;
 use etl_telemetry::tracing::init_test_tracing;
 use rand::random;
 
+/// The initial copy for a partitioned table (published via root) copies all existing rows.
 #[tokio::test(flavor = "multi_thread")]
 async fn partitioned_table_copy_replicates_existing_data() {
     init_test_tracing();
@@ -99,6 +100,7 @@ async fn partitioned_table_copy_replicates_existing_data() {
     );
 }
 
+/// The initial copy completes and CDC streams new rows from newly added partitions.
 #[tokio::test(flavor = "multi_thread")]
 async fn partitioned_table_copy_and_streams_new_data_from_new_partition() {
     init_test_tracing();
@@ -202,6 +204,7 @@ async fn partitioned_table_copy_and_streams_new_data_from_new_partition() {
     assert_eq!(parent_inserts.len(), 1);
 }
 
+/// Dropping a child partition must not emit DELETE/TRUNCATE events.
 #[tokio::test(flavor = "multi_thread")]
 async fn partition_drop_does_not_emit_delete_or_truncate() {
     init_test_tracing();
@@ -298,3 +301,415 @@ async fn partition_drop_does_not_emit_delete_or_truncate() {
         "Partition drop must not emit TRUNCATE events"
     );
 }
+
+/// When a partition is detached from a partitioned table with an explicit publication,
+/// inserts into the detached partition should NOT be replicated since only the parent
+/// table is in the publication.
+#[tokio::test(flavor = "multi_thread")]
+async fn partition_detach_with_explicit_publication_does_not_replicate_detached_inserts() {
+    init_test_tracing();
+    let database = spawn_source_database().await;
+
+    let table_name = test_table_name("partitioned_events_detach");
+    let partition_specs = [("p1", "from (1) to (100)"), ("p2", "from (100) to (200)")];
+
+    let (parent_table_id, partition_table_ids) =
+        create_partitioned_table(&database, table_name.clone(), &partition_specs)
+            .await
+            .expect("Failed to create partitioned table");
+
+    let p1_table_id = partition_table_ids[0];
+
+    // Insert initial data into both partitions
+    database
+        .run_sql(&format!(
+            "insert into {} (data, partition_key) values \
+             ('event1', 50), ('event2', 150)",
+            table_name.as_quoted_identifier()
+        ))
+        .await
+        .unwrap();
+
+    // Create explicit publication for parent table only
+    let publication_name = "test_partitioned_pub_detach".to_string();
+    database
+        .create_publication(&publication_name, std::slice::from_ref(&table_name))
+        .await
+        .expect("Failed to create publication");
+
+    let state_store = NotifyingStore::new();
+    let destination = TestDestinationWrapper::wrap(MemoryDestination::new());
+
+    let parent_sync_done = state_store
+        .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone)
+        .await;
+
+    let pipeline_id: PipelineId = random();
+    let mut pipeline = create_pipeline(
+        &database.config,
+        pipeline_id,
+        publication_name.clone(),
+        state_store.clone(),
+        destination.clone(),
+    );
+
+    pipeline.start().await.unwrap();
+    parent_sync_done.notified().await;
+
+    // Verify initial sync copied both rows
+    let table_rows = destination.get_table_rows().await;
+    assert_eq!(table_rows.len(), 1);
+    let parent_rows: usize = table_rows
+        .get(&parent_table_id)
+        .map(|rows| rows.len())
+        .unwrap_or(0);
+    assert_eq!(
+        parent_rows, 2,
+        "Parent table should have 2 rows from initial COPY"
+    );
+
+    // Detach partition p1 from parent
+    let child_p1_name = format!("{}_{}", table_name.name, "p1");
+    let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name);
+    database
+        .run_sql(&format!(
+            "alter table {} detach partition {}",
+            table_name.as_quoted_identifier(),
+            child_p1_qualified
+        ))
+        .await
+        .unwrap();
+
+    // Insert into the detached partition (should NOT be replicated)
+    database
+        .run_sql(&format!(
+            "insert into {} (data, partition_key) values ('detached_event', 25)",
+            child_p1_qualified
+        ))
+        .await
+        .unwrap();
+
+    // Insert into parent table (should be replicated to remaining partition p2)
+    database
+        .run_sql(&format!(
+            "insert into {} (data, partition_key) values ('parent_event', 125)",
+            table_name.as_quoted_identifier()
+        ))
+        .await
+        .unwrap();
+
+    // Wait for the parent table insert to be replicated
+    let inserts_notify = destination
+        .wait_for_events_count(vec![(EventType::Insert, 1)])
+        .await;
+    inserts_notify.notified().await;
+
+    let _ = pipeline.shutdown_and_wait().await;
+
+    // Verify events
+    let events = destination.get_events().await;
+    let grouped = group_events_by_type_and_table_id(&events);
+
+    // Parent table should have 1 insert event (the insert after detachment)
+    let parent_inserts = grouped
+        .get(&(EventType::Insert, parent_table_id))
+        .cloned()
+        .unwrap_or_default();
+    assert_eq!(
+        parent_inserts.len(),
+        1,
+        "Parent table should have exactly 1 CDC insert event"
+    );
+
+    // Detached partition should have NO insert events
+    let detached_inserts = grouped
+        .get(&(EventType::Insert, p1_table_id))
+        .cloned()
+        .unwrap_or_default();
+    assert_eq!(
+        detached_inserts.len(),
+        0,
+        "Detached partition inserts should NOT be replicated"
+    );
+}
+
+// TODO: validate.
+/// When a partition is detached from a partitioned table with FOR ALL TABLES publication,
+/// the detached partition becomes a standalone table. However, the running pipeline won't
+/// automatically discover it without re-scanning. This test validates the catalog state.
+#[tokio::test(flavor = "multi_thread")]
+async fn partition_detach_with_all_tables_publication_catalog_state() {
+    init_test_tracing();
+    let database = spawn_source_database().await;
+
+    let table_name = test_table_name("partitioned_events_all_tables");
+    let partition_specs = [("p1", "from (1) to (100)"), ("p2", "from (100) to (200)")];
+
+    let (parent_table_id, partition_table_ids) =
+        create_partitioned_table(&database, table_name.clone(), &partition_specs)
+            .await
+            .expect("Failed to create partitioned table");
+
+    let p1_table_id = partition_table_ids[0];
+
+    // Insert initial data
+    database
+        .run_sql(&format!(
+            "insert into {} (data, partition_key) values \
+             ('event1', 50), ('event2', 150)",
+            table_name.as_quoted_identifier()
+        ))
+        .await
+        .unwrap();
+
+    // Create FOR ALL TABLES publication
+    let publication_name = "test_all_tables_pub_detach".to_string();
+    database
+        .run_sql(&format!(
+            "create publication {} for all tables with (publish_via_partition_root = true)",
+            publication_name
+        ))
+        .await
+        .unwrap();
+
+    let state_store = NotifyingStore::new();
+    let destination = TestDestinationWrapper::wrap(MemoryDestination::new());
+
+    let parent_sync_done = state_store
+        .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone)
+        .await;
+
+    let pipeline_id: PipelineId = random();
+    let mut pipeline = create_pipeline(
+        &database.config,
+        pipeline_id,
+        publication_name.clone(),
+        state_store.clone(),
+        destination.clone(),
+    );
+
+    pipeline.start().await.unwrap();
+    parent_sync_done.notified().await;
+
+    // Verify initial state: only parent table is tracked
+    let table_states_before = state_store.get_table_replication_states().await;
+    assert!(
+        table_states_before.contains_key(&parent_table_id),
+        "Parent table should be tracked before detachment"
+    );
+    assert!(
+        !table_states_before.contains_key(&p1_table_id),
+        "Child partition p1 should NOT be tracked separately before detachment"
+    );
+
+    // Detach partition p1
+    let child_p1_name = format!("{}_{}", table_name.name, "p1");
+    let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name);
+    database
+        .run_sql(&format!(
+            "alter table {} detach partition {}",
+            table_name.as_quoted_identifier(),
+            child_p1_qualified
+        ))
+        .await
+        .unwrap();
+
+    // Verify catalog state: detached partition is now a standalone table
+    // Check pg_inherits - should no longer have parent relationship
+    let inherits_check = database
+        .client
+        .as_ref()
+        .unwrap()
+        .query(
+            "select count(*) as cnt from pg_inherits where inhrelid = $1",
+            &[&p1_table_id.0],
+        )
+        .await
+        .unwrap();
+    let inherits_count: i64 = inherits_check[0].get("cnt");
+    assert_eq!(
+        inherits_count, 0,
+        "Detached partition should have no parent in pg_inherits"
+    );
+
+    // Check pg_publication_tables - with FOR ALL TABLES, detached partition should appear
+    let pub_tables_check = database
+        .client
+        .as_ref()
+        .unwrap()
+        .query(
+            "select count(*) as cnt from pg_publication_tables
+             where pubname = $1 and tablename = $2",
+            &[&publication_name, &child_p1_name],
+        )
+        .await
+        .unwrap();
+    let pub_tables_count: i64 = pub_tables_check[0].get("cnt");
+    assert_eq!(
+        pub_tables_count, 1,
+        "Detached partition should appear in pg_publication_tables for ALL TABLES publication"
+    );
+
+    // Insert into detached partition
+    database
+        .run_sql(&format!(
+            "insert into {} (data, partition_key) values ('detached_event', 25)",
+            child_p1_qualified
+        ))
+        .await
+        .unwrap();
+
+    // Note: The running pipeline won't automatically discover the detached partition
+    // without re-scanning for new tables. This is expected behavior - table discovery
+    // happens at pipeline start or explicit refresh.
+
+    let _ = pipeline.shutdown_and_wait().await;
+
+    // The pipeline state should still only track the parent table (not the detached partition)
+    // because it hasn't re-scanned for new tables
+    let table_states_after = state_store.get_table_replication_states().await;
+    assert!(
+        table_states_after.contains_key(&parent_table_id),
+        "Parent table should still be tracked after detachment"
+    );
+
+    // The detached partition insert should NOT be replicated in this pipeline run
+    // because the pipeline hasn't discovered it as a new table
+    let events = destination.get_events().await;
+    let grouped = group_events_by_type_and_table_id(&events);
+    let detached_inserts = grouped
+        .get(&(EventType::Insert, p1_table_id))
+        .cloned()
+        .unwrap_or_default();
+    assert_eq!(
+        detached_inserts.len(),
+        0,
+        "Detached partition inserts should NOT be replicated without table re-discovery"
+    );
+}
+
+
+// TODO: validate.
+/// When a partition is detached and then the pipeline restarts (simulating table re-discovery),
+/// the detached partition should be discovered as a new standalone table if using FOR ALL TABLES.
+#[tokio::test(flavor = "multi_thread")]
+async fn partition_detach_with_all_tables_and_pipeline_restart_discovers_new_table() {
+    init_test_tracing();
+    let database = spawn_source_database().await;
+
+    let table_name = test_table_name("partitioned_events_restart");
+    let partition_specs = [("p1", "from (1) to (100)"), ("p2", "from (100) to (200)")];
+
+    let (parent_table_id, partition_table_ids) =
+        create_partitioned_table(&database, table_name.clone(), &partition_specs)
+            .await
+            .expect("Failed to create partitioned table");
+
+    let p1_table_id = partition_table_ids[0];
+
+    // Insert initial data
+    database
+        .run_sql(&format!(
+            "insert into {} (data, partition_key) values \
+             ('event1', 50), ('event2', 150)",
+            table_name.as_quoted_identifier()
+        ))
+        .await
+        .unwrap();
+
+    // Create FOR ALL TABLES publication
+    let publication_name = "test_all_tables_restart".to_string();
+    database
+        .run_sql(&format!(
+            "create publication {} for all tables with (publish_via_partition_root = true)",
+            publication_name
+        ))
+        .await
+        .unwrap();
+
+    let state_store = NotifyingStore::new();
+    let destination = TestDestinationWrapper::wrap(MemoryDestination::new());
+
+    // Start pipeline and wait for initial sync
+    let parent_sync_done = state_store
+        .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone)
+        .await;
+
+    let pipeline_id: PipelineId = random();
+    let mut pipeline = create_pipeline(
+        &database.config,
+        pipeline_id,
+        publication_name.clone(),
+        state_store.clone(),
+        destination.clone(),
+    );
+
+    pipeline.start().await.unwrap();
+    parent_sync_done.notified().await;
+
+    // Shutdown the first pipeline
+    let _ = pipeline.shutdown_and_wait().await;
+
+    // Detach partition p1
+    let child_p1_name = format!("{}_{}", table_name.name, "p1");
+    let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name);
+    database
+        .run_sql(&format!(
+            "alter table {} detach partition {}",
+            table_name.as_quoted_identifier(),
+            child_p1_qualified
+        ))
+        .await
+        .unwrap();
+
+    // Insert into detached partition (while pipeline is stopped)
+    database
+        .run_sql(&format!(
+            "insert into {} (data, partition_key) values ('detached_event', 25)",
+            child_p1_qualified
+        ))
+        .await
+        .unwrap();
+
+    // Restart the pipeline - it should now discover the detached partition as a new table
+    let state_store2 = NotifyingStore::new();
+    let destination2 = TestDestinationWrapper::wrap(MemoryDestination::new());
+
+    let detached_sync_done = state_store2
+        .notify_on_table_state_type(p1_table_id, TableReplicationPhaseType::SyncDone)
+        .await;
+
+    let pipeline_id2: PipelineId = random();
+    let mut pipeline2 = create_pipeline(
+        &database.config,
+        pipeline_id2,
+        publication_name.clone(),
+        state_store2.clone(),
+        destination2.clone(),
+    );
+
+    pipeline2.start().await.unwrap();
+
+    // Wait for detached partition to be synced
+    detached_sync_done.notified().await;
+
+    let _ = pipeline2.shutdown_and_wait().await;
+
+    // Verify the detached partition was discovered and synced
+    let table_states = state_store2.get_table_replication_states().await;
+    assert!(
+        table_states.contains_key(&p1_table_id),
+        "Detached partition should be discovered as a standalone table after restart"
+    );
+
+    // Verify the data from the detached partition was copied
+    let table_rows = destination2.get_table_rows().await;
+    let detached_rows: usize = table_rows
+        .get(&p1_table_id)
+        .map(|rows| rows.len())
+        .unwrap_or(0);
+    assert!(
+        detached_rows > 0,
+        "Detached partition should have rows synced after pipeline restart"
+    );
+}

From f2378fffe323cf72a598d65842261f259539c8ea Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Tue, 21 Oct 2025 16:20:45 +0200
Subject: [PATCH 06/26] Improve

---
 etl/tests/pipeline_with_partitioned_table.rs | 29 ++++++++++----------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs
index 790f07e89..b2d1acecf 100644
--- a/etl/tests/pipeline_with_partitioned_table.rs
+++ b/etl/tests/pipeline_with_partitioned_table.rs
@@ -13,7 +13,8 @@ use etl::types::PipelineId;
 use etl_telemetry::tracing::init_test_tracing;
 use rand::random;
 
-/// The initial copy for a partitioned table (published via root) copies all existing rows.
+/// Tests that initial COPY replicates all rows from a partitioned table.
+/// Only the parent table is tracked, not individual child partitions.
 #[tokio::test(flavor = "multi_thread")]
 async fn partitioned_table_copy_replicates_existing_data() {
     init_test_tracing();
@@ -100,7 +101,8 @@ async fn partitioned_table_copy_replicates_existing_data() {
     );
 }
 
-/// The initial copy completes and CDC streams new rows from newly added partitions.
+/// Tests that CDC streams inserts to partitions created after pipeline startup.
+/// New partitions are automatically included without publication changes.
 #[tokio::test(flavor = "multi_thread")]
 async fn partitioned_table_copy_and_streams_new_data_from_new_partition() {
     init_test_tracing();
@@ -204,7 +206,8 @@ async fn partitioned_table_copy_and_streams_new_data_from_new_partition() {
     assert_eq!(parent_inserts.len(), 1);
 }
 
-/// Dropping a child partition must not emit DELETE/TRUNCATE events.
+/// Tests that detaching and dropping a partition does not emit DELETE or TRUNCATE events.
+/// Partition management is a DDL operation, not DML, so no data events should be generated.
 #[tokio::test(flavor = "multi_thread")]
 async fn partition_drop_does_not_emit_delete_or_truncate() {
     init_test_tracing();
@@ -302,9 +305,9 @@ async fn partition_drop_does_not_emit_delete_or_truncate() {
     );
 }
 
-/// When a partition is detached from a partitioned table with an explicit publication,
-/// inserts into the detached partition should NOT be replicated since only the parent
-/// table is in the publication.
+/// Tests that detached partitions are not replicated with explicit publications.
+/// Once detached, the partition becomes independent and is not in the publication since
+/// only the parent table was explicitly added. Inserts to detached partitions are not replicated.
 #[tokio::test(flavor = "multi_thread")]
 async fn partition_detach_with_explicit_publication_does_not_replicate_detached_inserts() {
     init_test_tracing();
@@ -433,10 +436,9 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_
     );
 }
 
-// TODO: validate.
-/// When a partition is detached from a partitioned table with FOR ALL TABLES publication,
-/// the detached partition becomes a standalone table. However, the running pipeline won't
-/// automatically discover it without re-scanning. This test validates the catalog state.
+/// Tests catalog state when a partition is detached with FOR ALL TABLES publication.
+/// The detached partition appears in pg_publication_tables but is not automatically discovered
+/// by the running pipeline. Table discovery only happens at pipeline startup, not during execution.
 #[tokio::test(flavor = "multi_thread")]
 async fn partition_detach_with_all_tables_publication_catalog_state() {
     init_test_tracing();
@@ -588,10 +590,9 @@ async fn partition_detach_with_all_tables_publication_catalog_state() {
     );
 }
 
-
-// TODO: validate.
-/// When a partition is detached and then the pipeline restarts (simulating table re-discovery),
-/// the detached partition should be discovered as a new standalone table if using FOR ALL TABLES.
+/// Tests that a detached partition is discovered as a new table after pipeline restart.
+/// With FOR ALL TABLES publication, the detached partition is re-discovered during table
+/// scanning at startup and its data is replicated.
 #[tokio::test(flavor = "multi_thread")]
 async fn partition_detach_with_all_tables_and_pipeline_restart_discovers_new_table() {
     init_test_tracing();

From ac65190e6c9d025dd32e452dbd56e3e37b852bbf Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Fri, 24 Oct 2025 09:22:47 +0200
Subject: [PATCH 07/26] Improve

---
 etl/tests/pipeline_with_partitioned_table.rs | 61 ++++++++++----------
 1 file changed, 30 insertions(+), 31 deletions(-)

diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs
index b2d1acecf..cb2e89df1 100644
--- a/etl/tests/pipeline_with_partitioned_table.rs
+++ b/etl/tests/pipeline_with_partitioned_table.rs
@@ -266,7 +266,7 @@ async fn partition_drop_does_not_emit_delete_or_truncate() {
         .map(|v| v.len())
         .unwrap_or(0);
 
-    // Detach and drop one child partition (DDL should not generate DML events)
+    // Detach and drop one child partition (DDL should not generate DML events).
     let child_p1_name = format!("{}_{}", table_name.name, "p1");
     let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name);
     database
@@ -323,7 +323,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_
 
     let p1_table_id = partition_table_ids[0];
 
-    // Insert initial data into both partitions
+    // Insert initial data into both partitions.
     database
         .run_sql(&format!(
             "insert into {} (data, partition_key) values \
@@ -333,7 +333,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_
         .await
         .unwrap();
 
-    // Create explicit publication for parent table only
+    // Create explicit publication for parent table only.
     let publication_name = "test_partitioned_pub_detach".to_string();
     database
         .create_publication(&publication_name, std::slice::from_ref(&table_name))
@@ -359,7 +359,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_
     pipeline.start().await.unwrap();
     parent_sync_done.notified().await;
 
-    // Verify initial sync copied both rows
+    // Verify initial sync copied both rows.
     let table_rows = destination.get_table_rows().await;
     assert_eq!(table_rows.len(), 1);
     let parent_rows: usize = table_rows
@@ -371,7 +371,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_
         "Parent table should have 2 rows from initial COPY"
     );
 
-    // Detach partition p1 from parent
+    // Detach partition p1 from parent.
     let child_p1_name = format!("{}_{}", table_name.name, "p1");
     let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name);
     database
@@ -383,7 +383,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_
         .await
         .unwrap();
 
-    // Insert into the detached partition (should NOT be replicated)
+    // Insert into the detached partition (should NOT be replicated).
     database
         .run_sql(&format!(
             "insert into {} (data, partition_key) values ('detached_event', 25)",
@@ -392,7 +392,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_
         .await
         .unwrap();
 
-    // Insert into parent table (should be replicated to remaining partition p2)
+    // Insert into the parent table (should be replicated to remaining partition p2).
     database
         .run_sql(&format!(
             "insert into {} (data, partition_key) values ('parent_event', 125)",
@@ -401,7 +401,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_
         .await
         .unwrap();
 
-    // Wait for the parent table insert to be replicated
+    // Wait for the parent table insert to be replicated.
     let inserts_notify = destination
         .wait_for_events_count(vec![(EventType::Insert, 1)])
         .await;
@@ -413,7 +413,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_
     let events = destination.get_events().await;
     let grouped = group_events_by_type_and_table_id(&events);
 
-    // Parent table should have 1 insert event (the insert after detachment)
+    // Parent table should have 1 insert event (the insert after detachment).
     let parent_inserts = grouped
         .get(&(EventType::Insert, parent_table_id))
         .cloned()
@@ -424,7 +424,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_
         "Parent table should have exactly 1 CDC insert event"
     );
 
-    // Detached partition should have NO insert events
+    // Detached partition should have NO insert events.
     let detached_inserts = grouped
         .get(&(EventType::Insert, p1_table_id))
         .cloned()
@@ -454,7 +454,7 @@ async fn partition_detach_with_all_tables_publication_catalog_state() {
 
     let p1_table_id = partition_table_ids[0];
 
-    // Insert initial data
+    // Insert initial data.
     database
         .run_sql(&format!(
             "insert into {} (data, partition_key) values \
@@ -464,7 +464,7 @@ async fn partition_detach_with_all_tables_publication_catalog_state() {
         .await
         .unwrap();
 
-    // Create FOR ALL TABLES publication
+    // Create FOR ALL TABLES publication.
     let publication_name = "test_all_tables_pub_detach".to_string();
     database
         .run_sql(&format!(
@@ -493,7 +493,7 @@ async fn partition_detach_with_all_tables_publication_catalog_state() {
     pipeline.start().await.unwrap();
     parent_sync_done.notified().await;
 
-    // Verify initial state: only parent table is tracked
+    // Verify the initial state. The parent table is the only table tracked.
     let table_states_before = state_store.get_table_replication_states().await;
     assert!(
         table_states_before.contains_key(&parent_table_id),
@@ -504,7 +504,7 @@ async fn partition_detach_with_all_tables_publication_catalog_state() {
         "Child partition p1 should NOT be tracked separately before detachment"
     );
 
-    // Detach partition p1
+    // Detach partition p1.
     let child_p1_name = format!("{}_{}", table_name.name, "p1");
     let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name);
     database
@@ -516,8 +516,7 @@ async fn partition_detach_with_all_tables_publication_catalog_state() {
         .await
         .unwrap();
 
-    // Verify catalog state: detached partition is now a standalone table
-    // Check pg_inherits - should no longer have parent relationship
+    // Verify catalog state. The detached partition is now a standalone table.
     let inherits_check = database
         .client
         .as_ref()
@@ -534,7 +533,7 @@ async fn partition_detach_with_all_tables_publication_catalog_state() {
         "Detached partition should have no parent in pg_inherits"
     );
 
-    // Check pg_publication_tables - with FOR ALL TABLES, detached partition should appear
+    // Check pg_publication_tables. With FOR ALL TABLES, the detached partition should appear.
     let pub_tables_check = database
         .client
         .as_ref()
@@ -552,7 +551,7 @@ async fn partition_detach_with_all_tables_publication_catalog_state() {
         "Detached partition should appear in pg_publication_tables for ALL TABLES publication"
     );
 
-    // Insert into detached partition
+    // Insert into detached partition.
     database
         .run_sql(&format!(
             "insert into {} (data, partition_key) values ('detached_event', 25)",
@@ -562,13 +561,13 @@ async fn partition_detach_with_all_tables_publication_catalog_state() {
         .unwrap();
 
     // Note: The running pipeline won't automatically discover the detached partition
-    // without re-scanning for new tables. This is expected behavior - table discovery
+    // without re-scanning for new tables. This is expected behavior, the table discovery
     // happens at pipeline start or explicit refresh.
 
     let _ = pipeline.shutdown_and_wait().await;
 
     // The pipeline state should still only track the parent table (not the detached partition)
-    // because it hasn't re-scanned for new tables
+    // because it hasn't re-scanned for new tables.
     let table_states_after = state_store.get_table_replication_states().await;
     assert!(
         table_states_after.contains_key(&parent_table_id),
@@ -576,7 +575,7 @@ async fn partition_detach_with_all_tables_publication_catalog_state() {
     );
 
     // The detached partition insert should NOT be replicated in this pipeline run
-    // because the pipeline hasn't discovered it as a new table
+    // because the pipeline hasn't discovered it as a new table.
     let events = destination.get_events().await;
     let grouped = group_events_by_type_and_table_id(&events);
     let detached_inserts = grouped
@@ -608,7 +607,7 @@ async fn partition_detach_with_all_tables_and_pipeline_restart_discovers_new_tab
 
     let p1_table_id = partition_table_ids[0];
 
-    // Insert initial data
+    // Insert initial data.
     database
         .run_sql(&format!(
             "insert into {} (data, partition_key) values \
@@ -618,7 +617,7 @@ async fn partition_detach_with_all_tables_and_pipeline_restart_discovers_new_tab
         .await
         .unwrap();
 
-    // Create FOR ALL TABLES publication
+    // Create FOR ALL TABLES publication.
     let publication_name = "test_all_tables_restart".to_string();
     database
         .run_sql(&format!(
@@ -631,7 +630,7 @@ async fn partition_detach_with_all_tables_and_pipeline_restart_discovers_new_tab
     let state_store = NotifyingStore::new();
     let destination = TestDestinationWrapper::wrap(MemoryDestination::new());
 
-    // Start pipeline and wait for initial sync
+    // Start pipeline and wait for initial sync.
     let parent_sync_done = state_store
         .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone)
         .await;
@@ -648,10 +647,10 @@ async fn partition_detach_with_all_tables_and_pipeline_restart_discovers_new_tab
     pipeline.start().await.unwrap();
     parent_sync_done.notified().await;
 
-    // Shutdown the first pipeline
+    // Shutdown the first pipeline.
     let _ = pipeline.shutdown_and_wait().await;
 
-    // Detach partition p1
+    // Detach partition p1.
     let child_p1_name = format!("{}_{}", table_name.name, "p1");
     let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name);
     database
@@ -663,7 +662,7 @@ async fn partition_detach_with_all_tables_and_pipeline_restart_discovers_new_tab
         .await
         .unwrap();
 
-    // Insert into detached partition (while pipeline is stopped)
+    // Insert into detached partition (while pipeline is stopped).
     database
         .run_sql(&format!(
             "insert into {} (data, partition_key) values ('detached_event', 25)",
@@ -672,7 +671,7 @@ async fn partition_detach_with_all_tables_and_pipeline_restart_discovers_new_tab
         .await
         .unwrap();
 
-    // Restart the pipeline - it should now discover the detached partition as a new table
+    // Restart the pipeline. It should now discover the detached partition as a new table.
     let state_store2 = NotifyingStore::new();
     let destination2 = TestDestinationWrapper::wrap(MemoryDestination::new());
 
@@ -691,19 +690,19 @@ async fn partition_detach_with_all_tables_and_pipeline_restart_discovers_new_tab
 
     pipeline2.start().await.unwrap();
 
-    // Wait for detached partition to be synced
+    // Wait for the detached partition to be synced.
     detached_sync_done.notified().await;
 
     let _ = pipeline2.shutdown_and_wait().await;
 
-    // Verify the detached partition was discovered and synced
+    // Verify the detached partition was discovered and synced.
     let table_states = state_store2.get_table_replication_states().await;
     assert!(
         table_states.contains_key(&p1_table_id),
         "Detached partition should be discovered as a standalone table after restart"
     );
 
-    // Verify the data from the detached partition was copied
+    // Verify the data from the detached partition was copied.
     let table_rows = destination2.get_table_rows().await;
     let detached_rows: usize = table_rows
         .get(&p1_table_id)

From 8d553c38b9cb73b2ab399ab67299073501993f4e Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Fri, 24 Oct 2025 10:35:56 +0200
Subject: [PATCH 08/26] Improve

---
 etl/src/replication/client.rs                |  8 +--
 etl/tests/pipeline_with_partitioned_table.rs | 51 +++++++++++++-------
 2 files changed, 37 insertions(+), 22 deletions(-)

diff --git a/etl/src/replication/client.rs b/etl/src/replication/client.rs
index 648a5686a..bf594a4fc 100644
--- a/etl/src/replication/client.rs
+++ b/etl/src/replication/client.rs
@@ -720,7 +720,8 @@ impl PgReplicationClient {
                             when 0 then true
                             else (a.attnum in (select attnum from pub_attrs))
                         end
-                    )".to_string(),
+                    )"
+                .to_string(),
             };
         }
 
@@ -750,8 +751,7 @@ impl PgReplicationClient {
         publication: Option<&str>,
     ) -> EtlResult<Vec<ColumnSchema>> {
         // Build publication filter CTEs and predicates based on Postgres version.
-        let publication_filter =
-            self.build_publication_filter_sql(table_id, publication);
+        let publication_filter = self.build_publication_filter_sql(table_id, publication);
 
         let column_info_query = format!(
             r#"
@@ -804,7 +804,7 @@ impl PgReplicationClient {
             publication_ctes = publication_filter.ctes,
             publication_predicate = publication_filter.predicate,
         );
-        
+
         let mut column_schemas = vec![];
         for message in self.client.simple_query(&column_info_query).await? {
             if let SimpleQueryMessage::Row(row) = message {
diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs
index cb2e89df1..fa572dcac 100644
--- a/etl/tests/pipeline_with_partitioned_table.rs
+++ b/etl/tests/pipeline_with_partitioned_table.rs
@@ -647,8 +647,16 @@ async fn partition_detach_with_all_tables_and_pipeline_restart_discovers_new_tab
     pipeline.start().await.unwrap();
     parent_sync_done.notified().await;
 
-    // Shutdown the first pipeline.
-    let _ = pipeline.shutdown_and_wait().await;
+    // Verify the initial state. The parent table is the only table tracked.
+    let table_states_before = state_store.get_table_replication_states().await;
+    assert!(
+        table_states_before.contains_key(&parent_table_id),
+        "Parent table should be tracked before detachment"
+    );
+    assert!(
+        !table_states_before.contains_key(&p1_table_id),
+        "Child partition p1 should NOT be tracked separately before detachment"
+    );
 
     // Detach partition p1.
     let child_p1_name = format!("{}_{}", table_name.name, "p1");
@@ -671,45 +679,52 @@ async fn partition_detach_with_all_tables_and_pipeline_restart_discovers_new_tab
         .await
         .unwrap();
 
-    // Restart the pipeline. It should now discover the detached partition as a new table.
-    let state_store2 = NotifyingStore::new();
-    let destination2 = TestDestinationWrapper::wrap(MemoryDestination::new());
+    // Shutdown the pipeline.
+    let _ = pipeline.shutdown_and_wait().await;
 
-    let detached_sync_done = state_store2
+    // Restart the pipeline. It should now discover the detached partition as a new table.
+    let detached_sync_done = state_store
         .notify_on_table_state_type(p1_table_id, TableReplicationPhaseType::SyncDone)
         .await;
 
-    let pipeline_id2: PipelineId = random();
-    let mut pipeline2 = create_pipeline(
+    let mut pipeline = create_pipeline(
         &database.config,
-        pipeline_id2,
+        pipeline_id,
         publication_name.clone(),
-        state_store2.clone(),
-        destination2.clone(),
+        state_store.clone(),
+        destination.clone(),
     );
 
-    pipeline2.start().await.unwrap();
+    pipeline.start().await.unwrap();
 
     // Wait for the detached partition to be synced.
     detached_sync_done.notified().await;
 
-    let _ = pipeline2.shutdown_and_wait().await;
+    let _ = pipeline.shutdown_and_wait().await;
 
     // Verify the detached partition was discovered and synced.
-    let table_states = state_store2.get_table_replication_states().await;
+    let table_states_after = state_store.get_table_replication_states().await;
     assert!(
-        table_states.contains_key(&p1_table_id),
+        table_states_after.contains_key(&p1_table_id),
         "Detached partition should be discovered as a standalone table after restart"
     );
 
     // Verify the data from the detached partition was copied.
-    let table_rows = destination2.get_table_rows().await;
+    let table_rows = destination.get_table_rows().await;
+    let parent_rows: usize = table_rows
+        .get(&p1_table_id)
+        .map(|rows| rows.len())
+        .unwrap_or(0);
+    assert_eq!(
+        parent_rows, 2,
+        "The parent table should have the initial rows"
+    );
     let detached_rows: usize = table_rows
         .get(&p1_table_id)
         .map(|rows| rows.len())
         .unwrap_or(0);
-    assert!(
-        detached_rows > 0,
+    assert_eq!(
+        detached_rows, 2,
         "Detached partition should have rows synced after pipeline restart"
     );
 }

From 319273b63e8939716770bdc94c57ddab1d387d9f Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Fri, 24 Oct 2025 10:43:32 +0200
Subject: [PATCH 09/26] Improve

---
 etl/tests/pipeline_with_partitioned_table.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs
index fa572dcac..620476b1c 100644
--- a/etl/tests/pipeline_with_partitioned_table.rs
+++ b/etl/tests/pipeline_with_partitioned_table.rs
@@ -440,7 +440,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_
 /// The detached partition appears in pg_publication_tables but is not automatically discovered
 /// by the running pipeline. Table discovery only happens at pipeline startup, not during execution.
 #[tokio::test(flavor = "multi_thread")]
-async fn partition_detach_with_all_tables_publication_catalog_state() {
+async fn partition_detach_with_all_tables_publication_does_not_replicate_detached_inserts() {
     init_test_tracing();
     let database = spawn_source_database().await;
 
@@ -593,7 +593,7 @@ async fn partition_detach_with_all_tables_publication_catalog_state() {
 /// With FOR ALL TABLES publication, the detached partition is re-discovered during table
 /// scanning at startup and its data is replicated.
 #[tokio::test(flavor = "multi_thread")]
-async fn partition_detach_with_all_tables_and_pipeline_restart_discovers_new_table() {
+async fn partition_detach_with_all_tables_publication_does_replicate_detached_inserts_on_restart() {
     init_test_tracing();
     let database = spawn_source_database().await;
 

From 29f1f41cc401b55059f177b4ce7f2ca26026e353 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Fri, 24 Oct 2025 11:29:06 +0200
Subject: [PATCH 10/26] Improve

---
 etl/src/replication/client.rs | 35 ++++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/etl/src/replication/client.rs b/etl/src/replication/client.rs
index bf594a4fc..da36cc10d 100644
--- a/etl/src/replication/client.rs
+++ b/etl/src/replication/client.rs
@@ -642,6 +642,7 @@ impl PgReplicationClient {
     ) -> EtlResult<TableSchema> {
         let table_name = self.get_table_name(table_id).await?;
         let column_schemas = self.get_column_schemas(table_id, publication).await?;
+        warn!("COLUMNS SCHEMAS FOR TABLE {:?}: {:?}", table_name, column_schemas);
 
         Ok(TableSchema {
             name: table_name,
@@ -706,20 +707,27 @@ impl PgReplicationClient {
         {
             return PublicationFilter {
                 ctes: format!(
-                    "pub_attrs as (
-                        select unnest(r.prattrs) as attnum
-                        from pg_publication_rel r
-                        join pg_publication p on r.prpubid = p.oid
+                    "pub_info as (
+                        select p.puballtables, r.prattrs
+                        from pg_publication p
+                        left join pg_publication_rel r on r.prpubid = p.oid and r.prrelid = {table_id}
                         where p.pubname = {publication}
-                            and r.prrelid = {table_id}
+                    ),
+                    pub_attrs as (
+                        select unnest(prattrs) as attnum
+                        from pub_info
+                        where prattrs is not null
                     ),",
                     publication = quote_literal(publication_name),
                 ),
                 predicate: "and (
-                        case (select count(*) from pub_attrs)
-                            when 0 then true
-                            else (a.attnum in (select attnum from pub_attrs))
-                        end
+                        (select puballtables from pub_info) = true
+                        or (
+                            case (select count(*) from pub_attrs)
+                                when 0 then true
+                                else (a.attnum in (select attnum from pub_attrs))
+                            end
+                        )
                     )"
                 .to_string(),
             };
@@ -728,7 +736,12 @@ impl PgReplicationClient {
         // Postgres 14 and earlier: table-level filtering only
         PublicationFilter {
             ctes: format!(
-                "pub_table as (
+                "pub_info as (
+                    select p.puballtables
+                    from pg_publication p
+                    where p.pubname = {publication}
+                ),
+                pub_table as (
                     select 1 as exists_in_pub
                     from pg_publication_rel r
                     join pg_publication p on r.prpubid = p.oid
@@ -737,7 +750,7 @@ impl PgReplicationClient {
                 ),",
                 publication = quote_literal(publication_name),
             ),
-            predicate: "and (select count(*) from pub_table) > 0".to_string(),
+            predicate: "and ((select puballtables from pub_info) = true or (select count(*) from pub_table) > 0)".to_string(),
         }
     }
 

From 9a4d1fc0d96c0dd08bc675f2bbb434c3381051fc Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Fri, 24 Oct 2025 13:26:14 +0200
Subject: [PATCH 11/26] Improve

---
 etl-postgres/src/tokio/test_utils.rs         |   6 +-
 etl/src/replication/client.rs                |  13 +-
 etl/tests/pipeline_with_partitioned_table.rs | 414 ++++++++++++++++++-
 3 files changed, 419 insertions(+), 14 deletions(-)

diff --git a/etl-postgres/src/tokio/test_utils.rs b/etl-postgres/src/tokio/test_utils.rs
index 7aad2b62b..8a0d52601 100644
--- a/etl-postgres/src/tokio/test_utils.rs
+++ b/etl-postgres/src/tokio/test_utils.rs
@@ -87,9 +87,9 @@ impl<G: GenericClient> PgDatabase<G> {
             // PostgreSQL 15+ supports FOR ALL TABLES IN SCHEMA syntax
             let create_publication_query = match schema {
                 Some(schema_name) => format!(
-                    "create publication {publication_name} for tables in schema {schema_name}"
+                    "create publication {publication_name} for tables in schema {schema_name} with (publish_via_partition_root = true)"
                 ),
-                None => format!("create publication {publication_name} for all tables"),
+                None => format!("create publication {publication_name} for all tables with (publish_via_partition_root = true)"),
             };
 
             client.execute(&create_publication_query, &[]).await?;
@@ -116,7 +116,7 @@ impl<G: GenericClient> PgDatabase<G> {
                 }
                 None => {
                     let create_publication_query =
-                        format!("create publication {publication_name} for all tables");
+                        format!("create publication {publication_name} for all tables with (publish_via_partition_root = true)");
                     client.execute(&create_publication_query, &[]).await?;
                 }
             }
diff --git a/etl/src/replication/client.rs b/etl/src/replication/client.rs
index da36cc10d..5a03e0d6c 100644
--- a/etl/src/replication/client.rs
+++ b/etl/src/replication/client.rs
@@ -434,7 +434,7 @@ impl PgReplicationClient {
 
                 union all
 
-                -- Get tables from pg_publication_tables (for ALL TABLES publications)
+                -- Get tables from pg_publication_tables (for ALL TABLES and FOR TABLES IN SCHEMA)
                 -- Only executes if pg_publication_rel is empty for this publication
                 select c.oid
                 from pg_publication_tables pt
@@ -642,7 +642,6 @@ impl PgReplicationClient {
     ) -> EtlResult<TableSchema> {
         let table_name = self.get_table_name(table_id).await?;
         let column_schemas = self.get_column_schemas(table_id, publication).await?;
-        warn!("COLUMNS SCHEMAS FOR TABLE {:?}: {:?}", table_name, column_schemas);
 
         Ok(TableSchema {
             name: table_name,
@@ -708,7 +707,7 @@ impl PgReplicationClient {
             return PublicationFilter {
                 ctes: format!(
                     "pub_info as (
-                        select p.puballtables, r.prattrs
+                        select p.oid as puboid, p.puballtables, r.prattrs
                         from pg_publication p
                         left join pg_publication_rel r on r.prpubid = p.oid and r.prrelid = {table_id}
                         where p.pubname = {publication}
@@ -717,11 +716,19 @@ impl PgReplicationClient {
                         select unnest(prattrs) as attnum
                         from pub_info
                         where prattrs is not null
+                    ),
+                    pub_schema as (
+                        select 1 as exists_in_schema_pub
+                        from pub_info
+                        join pg_publication_namespace pn on pn.pnpubid = pub_info.puboid
+                        join pg_class c on c.relnamespace = pn.pnnspid
+                        where c.oid = {table_id}
                     ),",
                     publication = quote_literal(publication_name),
                 ),
                 predicate: "and (
                         (select puballtables from pub_info) = true
+                        or (select count(*) from pub_schema) > 0
                         or (
                             case (select count(*) from pub_attrs)
                                 when 0 then true
diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs
index 620476b1c..d1ab3c5f0 100644
--- a/etl/tests/pipeline_with_partitioned_table.rs
+++ b/etl/tests/pipeline_with_partitioned_table.rs
@@ -467,10 +467,7 @@ async fn partition_detach_with_all_tables_publication_does_not_replicate_detache
     // Create FOR ALL TABLES publication.
     let publication_name = "test_all_tables_pub_detach".to_string();
     database
-        .run_sql(&format!(
-            "create publication {} for all tables with (publish_via_partition_root = true)",
-            publication_name
-        ))
+        .create_publication_for_all(&publication_name, None)
         .await
         .unwrap();
 
@@ -620,10 +617,7 @@ async fn partition_detach_with_all_tables_publication_does_replicate_detached_in
     // Create FOR ALL TABLES publication.
     let publication_name = "test_all_tables_restart".to_string();
     database
-        .run_sql(&format!(
-            "create publication {} for all tables with (publish_via_partition_root = true)",
-            publication_name
-        ))
+        .create_publication_for_all(&publication_name, None)
         .await
         .unwrap();
 
@@ -728,3 +722,407 @@ async fn partition_detach_with_all_tables_publication_does_replicate_detached_in
         "Detached partition should have rows synced after pipeline restart"
     );
 }
+
+/// Tests that partitioned tables replicate correctly with FOR TABLES IN SCHEMA publication.
+/// The parent table in the schema should be tracked and all data should be replicated.
+/// Requires PostgreSQL 15+ for FOR TABLES IN SCHEMA support.
+#[tokio::test(flavor = "multi_thread")]
+async fn partitioned_table_with_schema_publication_replicates_data() {
+    init_test_tracing();
+    let database = spawn_source_database().await;
+
+    // Skip test if PostgreSQL version is < 15 (FOR TABLES IN SCHEMA requires 15+).
+    if let Some(version) = database.server_version() {
+        if version.get() < 150000 {
+            eprintln!("Skipping test: PostgreSQL 15+ required for FOR TABLES IN SCHEMA");
+            return;
+        }
+    }
+
+    let table_name = test_table_name("partitioned_events_schema");
+    let partition_specs = [
+        ("p1", "from (1) to (100)"),
+        ("p2", "from (100) to (200)"),
+        ("p3", "from (200) to (300)"),
+    ];
+
+    let (parent_table_id, _partition_table_ids) =
+        create_partitioned_table(&database, table_name.clone(), &partition_specs)
+            .await
+            .expect("Failed to create partitioned table");
+
+    database
+        .run_sql(&format!(
+            "insert into {} (data, partition_key) values \
+             ('event1', 50), ('event2', 150), ('event3', 250)",
+            table_name.as_quoted_identifier()
+        ))
+        .await
+        .unwrap();
+
+    // Create FOR TABLES IN SCHEMA publication.
+    let publication_name = "test_schema_pub".to_string();
+    database
+        .create_publication_for_all(&publication_name, Some(&table_name.schema))
+        .await
+        .expect("Failed to create publication");
+
+    let state_store = NotifyingStore::new();
+    let destination = TestDestinationWrapper::wrap(MemoryDestination::new());
+
+    let parent_sync_done = state_store
+        .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone)
+        .await;
+
+    let pipeline_id: PipelineId = random();
+    let mut pipeline = create_pipeline(
+        &database.config,
+        pipeline_id,
+        publication_name,
+        state_store.clone(),
+        destination.clone(),
+    );
+
+    pipeline.start().await.unwrap();
+    parent_sync_done.notified().await;
+
+    let _ = pipeline.shutdown_and_wait().await;
+
+    let table_rows = destination.get_table_rows().await;
+    let total_rows: usize = table_rows.values().map(|rows| rows.len()).sum();
+
+    assert_eq!(
+        total_rows, 3,
+        "Expected 3 rows synced from schema publication, but got {total_rows}"
+    );
+
+    let table_states = state_store.get_table_replication_states().await;
+
+    assert!(
+        table_states.contains_key(&parent_table_id),
+        "Parent table should be tracked in state"
+    );
+    assert_eq!(
+        table_states.len(),
+        1,
+        "Only the parent table should be tracked in state"
+    );
+
+    let parent_table_rows = table_rows
+        .iter()
+        .filter(|(table_id, _)| **table_id == parent_table_id)
+        .map(|(_, rows)| rows.len())
+        .sum::<usize>();
+    assert_eq!(
+        parent_table_rows, 3,
+        "Parent table should contain all rows from schema publication"
+    );
+}
+
+/// Tests that detached partitions are not automatically discovered with FOR TABLES IN SCHEMA publication.
+/// Similar to FOR ALL TABLES, the detached partition appears in pg_publication_tables but is not
+/// automatically discovered by the running pipeline without restart.
+/// Requires PostgreSQL 15+ for FOR TABLES IN SCHEMA support.
+#[tokio::test(flavor = "multi_thread")]
+async fn partition_detach_with_schema_publication_does_not_replicate_detached_inserts() {
+    init_test_tracing();
+    let database = spawn_source_database().await;
+
+    // Skip test if PostgreSQL version is < 15 (FOR TABLES IN SCHEMA requires 15+).
+    if let Some(version) = database.server_version() {
+        if version.get() < 150000 {
+            eprintln!("Skipping test: PostgreSQL 15+ required for FOR TABLES IN SCHEMA");
+            return;
+        }
+    }
+
+    let table_name = test_table_name("partitioned_events_schema_detach");
+    let partition_specs = [("p1", "from (1) to (100)"), ("p2", "from (100) to (200)")];
+
+    let (parent_table_id, partition_table_ids) =
+        create_partitioned_table(&database, table_name.clone(), &partition_specs)
+            .await
+            .expect("Failed to create partitioned table");
+
+    let p1_table_id = partition_table_ids[0];
+
+    database
+        .run_sql(&format!(
+            "insert into {} (data, partition_key) values \
+             ('event1', 50), ('event2', 150)",
+            table_name.as_quoted_identifier()
+        ))
+        .await
+        .unwrap();
+
+    // Create FOR TABLES IN SCHEMA publication.
+    let publication_name = "test_schema_pub_detach".to_string();
+    database
+        .create_publication_for_all(&publication_name, Some(&table_name.schema))
+        .await
+        .unwrap();
+
+    let state_store = NotifyingStore::new();
+    let destination = TestDestinationWrapper::wrap(MemoryDestination::new());
+
+    let parent_sync_done = state_store
+        .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone)
+        .await;
+
+    let pipeline_id: PipelineId = random();
+    let mut pipeline = create_pipeline(
+        &database.config,
+        pipeline_id,
+        publication_name.clone(),
+        state_store.clone(),
+        destination.clone(),
+    );
+
+    pipeline.start().await.unwrap();
+    parent_sync_done.notified().await;
+
+    // Verify initial state.
+    let table_states_before = state_store.get_table_replication_states().await;
+    assert!(
+        table_states_before.contains_key(&parent_table_id),
+        "Parent table should be tracked before detachment"
+    );
+    assert!(
+        !table_states_before.contains_key(&p1_table_id),
+        "Child partition p1 should NOT be tracked separately before detachment"
+    );
+
+    // Detach partition p1.
+    let child_p1_name = format!("{}_{}", table_name.name, "p1");
+    let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name);
+    database
+        .run_sql(&format!(
+            "alter table {} detach partition {}",
+            table_name.as_quoted_identifier(),
+            child_p1_qualified
+        ))
+        .await
+        .unwrap();
+
+    // Verify catalog state. The detached partition should appear in pg_publication_tables.
+    let pub_tables_check = database
+        .client
+        .as_ref()
+        .unwrap()
+        .query(
+            "select count(*) as cnt from pg_publication_tables
+             where pubname = $1 and tablename = $2",
+            &[&publication_name, &child_p1_name],
+        )
+        .await
+        .unwrap();
+    let pub_tables_count: i64 = pub_tables_check[0].get("cnt");
+    assert_eq!(
+        pub_tables_count, 1,
+        "Detached partition should appear in pg_publication_tables for TABLES IN SCHEMA publication"
+    );
+
+    // Insert into detached partition.
+    database
+        .run_sql(&format!(
+            "insert into {} (data, partition_key) values ('detached_event', 25)",
+            child_p1_qualified
+        ))
+        .await
+        .unwrap();
+
+    // Insert into parent table (should be replicated).
+    database
+        .run_sql(&format!(
+            "insert into {} (data, partition_key) values ('parent_event', 125)",
+            table_name.as_quoted_identifier()
+        ))
+        .await
+        .unwrap();
+
+    // Wait for the parent table insert to be replicated.
+    let inserts_notify = destination
+        .wait_for_events_count(vec![(EventType::Insert, 1)])
+        .await;
+    inserts_notify.notified().await;
+
+    let _ = pipeline.shutdown_and_wait().await;
+
+    // The pipeline state should still only track the parent table.
+    let table_states_after = state_store.get_table_replication_states().await;
+    assert!(
+        table_states_after.contains_key(&parent_table_id),
+        "Parent table should still be tracked after detachment"
+    );
+
+    // Verify events.
+    let events = destination.get_events().await;
+    let grouped = group_events_by_type_and_table_id(&events);
+
+    // Parent table should have 1 insert event.
+    let parent_inserts = grouped
+        .get(&(EventType::Insert, parent_table_id))
+        .cloned()
+        .unwrap_or_default();
+    assert_eq!(
+        parent_inserts.len(),
+        1,
+        "Parent table should have exactly 1 CDC insert event"
+    );
+
+    // Detached partition inserts should NOT be replicated without table re-discovery.
+    let detached_inserts = grouped
+        .get(&(EventType::Insert, p1_table_id))
+        .cloned()
+        .unwrap_or_default();
+    assert_eq!(
+        detached_inserts.len(),
+        0,
+        "Detached partition inserts should NOT be replicated without table re-discovery"
+    );
+}
+
+/// Tests that a detached partition is discovered as a new table after pipeline restart
+/// with FOR TABLES IN SCHEMA publication. After restart, the detached partition in the same
+/// schema should be discovered and its data replicated.
+/// Requires PostgreSQL 15+ for FOR TABLES IN SCHEMA support.
+#[tokio::test(flavor = "multi_thread")]
+async fn partition_detach_with_schema_publication_does_replicate_detached_inserts_on_restart() {
+    init_test_tracing();
+    let database = spawn_source_database().await;
+
+    // Skip test if PostgreSQL version is < 15 (FOR TABLES IN SCHEMA requires 15+).
+    if let Some(version) = database.server_version() {
+        if version.get() < 150000 {
+            eprintln!("Skipping test: PostgreSQL 15+ required for FOR TABLES IN SCHEMA");
+            return;
+        }
+    }
+
+    let table_name = test_table_name("partitioned_events_schema_restart");
+    let partition_specs = [("p1", "from (1) to (100)"), ("p2", "from (100) to (200)")];
+
+    let (parent_table_id, partition_table_ids) =
+        create_partitioned_table(&database, table_name.clone(), &partition_specs)
+            .await
+            .expect("Failed to create partitioned table");
+
+    let p1_table_id = partition_table_ids[0];
+
+    database
+        .run_sql(&format!(
+            "insert into {} (data, partition_key) values \
+             ('event1', 50), ('event2', 150)",
+            table_name.as_quoted_identifier()
+        ))
+        .await
+        .unwrap();
+
+    // Create FOR TABLES IN SCHEMA publication.
+    let publication_name = "test_schema_pub_restart".to_string();
+    database
+        .create_publication_for_all(&publication_name, Some(&table_name.schema))
+        .await
+        .unwrap();
+
+    let state_store = NotifyingStore::new();
+    let destination = TestDestinationWrapper::wrap(MemoryDestination::new());
+
+    // Start pipeline and wait for initial sync.
+    let parent_sync_done = state_store
+        .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone)
+        .await;
+
+    let pipeline_id: PipelineId = random();
+    let mut pipeline = create_pipeline(
+        &database.config,
+        pipeline_id,
+        publication_name.clone(),
+        state_store.clone(),
+        destination.clone(),
+    );
+
+    pipeline.start().await.unwrap();
+    parent_sync_done.notified().await;
+
+    // Verify initial state.
+    let table_states_before = state_store.get_table_replication_states().await;
+    assert!(
+        table_states_before.contains_key(&parent_table_id),
+        "Parent table should be tracked before detachment"
+    );
+    assert!(
+        !table_states_before.contains_key(&p1_table_id),
+        "Child partition p1 should NOT be tracked separately before detachment"
+    );
+
+    // Detach partition p1.
+    let child_p1_name = format!("{}_{}", table_name.name, "p1");
+    let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name);
+    database
+        .run_sql(&format!(
+            "alter table {} detach partition {}",
+            table_name.as_quoted_identifier(),
+            child_p1_qualified
+        ))
+        .await
+        .unwrap();
+
+    // Insert into detached partition (while pipeline is still running).
+    database
+        .run_sql(&format!(
+            "insert into {} (data, partition_key) values ('detached_event', 25)",
+            child_p1_qualified
+        ))
+        .await
+        .unwrap();
+
+    // Shutdown the pipeline.
+    let _ = pipeline.shutdown_and_wait().await;
+
+    // Restart the pipeline. It should now discover the detached partition as a new table.
+    let detached_sync_done = state_store
+        .notify_on_table_state_type(p1_table_id, TableReplicationPhaseType::SyncDone)
+        .await;
+
+    let mut pipeline = create_pipeline(
+        &database.config,
+        pipeline_id,
+        publication_name.clone(),
+        state_store.clone(),
+        destination.clone(),
+    );
+
+    pipeline.start().await.unwrap();
+
+    // Wait for the detached partition to be synced.
+    detached_sync_done.notified().await;
+
+    let _ = pipeline.shutdown_and_wait().await;
+
+    // Verify the detached partition was discovered and synced.
+    let table_states_after = state_store.get_table_replication_states().await;
+    assert!(
+        table_states_after.contains_key(&p1_table_id),
+        "Detached partition should be discovered as a standalone table after restart"
+    );
+
+    // Verify the data from the detached partition was copied.
+    let table_rows = destination.get_table_rows().await;
+    let parent_rows: usize = table_rows
+        .get(&parent_table_id)
+        .map(|rows| rows.len())
+        .unwrap_or(0);
+    assert_eq!(
+        parent_rows, 2,
+        "Parent table should have the initial 2 rows from first pipeline run"
+    );
+    let detached_rows: usize = table_rows
+        .get(&p1_table_id)
+        .map(|rows| rows.len())
+        .unwrap_or(0);
+    assert_eq!(
+        detached_rows, 2,
+        "Detached partition should have 2 rows synced after pipeline restart (1 from initial data + 1 inserted)"
+    );
+}

From bbfdf95931f5c5ba99149eef17a0791fdcc74427 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Fri, 24 Oct 2025 13:28:20 +0200
Subject: [PATCH 12/26] Improve

---
 etl/tests/pipeline_with_partitioned_table.rs | 96 --------------------
 1 file changed, 96 deletions(-)

diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs
index d1ab3c5f0..9935d8853 100644
--- a/etl/tests/pipeline_with_partitioned_table.rs
+++ b/etl/tests/pipeline_with_partitioned_table.rs
@@ -723,102 +723,6 @@ async fn partition_detach_with_all_tables_publication_does_replicate_detached_in
     );
 }
 
-/// Tests that partitioned tables replicate correctly with FOR TABLES IN SCHEMA publication.
-/// The parent table in the schema should be tracked and all data should be replicated.
-/// Requires PostgreSQL 15+ for FOR TABLES IN SCHEMA support.
-#[tokio::test(flavor = "multi_thread")]
-async fn partitioned_table_with_schema_publication_replicates_data() {
-    init_test_tracing();
-    let database = spawn_source_database().await;
-
-    // Skip test if PostgreSQL version is < 15 (FOR TABLES IN SCHEMA requires 15+).
-    if let Some(version) = database.server_version() {
-        if version.get() < 150000 {
-            eprintln!("Skipping test: PostgreSQL 15+ required for FOR TABLES IN SCHEMA");
-            return;
-        }
-    }
-
-    let table_name = test_table_name("partitioned_events_schema");
-    let partition_specs = [
-        ("p1", "from (1) to (100)"),
-        ("p2", "from (100) to (200)"),
-        ("p3", "from (200) to (300)"),
-    ];
-
-    let (parent_table_id, _partition_table_ids) =
-        create_partitioned_table(&database, table_name.clone(), &partition_specs)
-            .await
-            .expect("Failed to create partitioned table");
-
-    database
-        .run_sql(&format!(
-            "insert into {} (data, partition_key) values \
-             ('event1', 50), ('event2', 150), ('event3', 250)",
-            table_name.as_quoted_identifier()
-        ))
-        .await
-        .unwrap();
-
-    // Create FOR TABLES IN SCHEMA publication.
-    let publication_name = "test_schema_pub".to_string();
-    database
-        .create_publication_for_all(&publication_name, Some(&table_name.schema))
-        .await
-        .expect("Failed to create publication");
-
-    let state_store = NotifyingStore::new();
-    let destination = TestDestinationWrapper::wrap(MemoryDestination::new());
-
-    let parent_sync_done = state_store
-        .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone)
-        .await;
-
-    let pipeline_id: PipelineId = random();
-    let mut pipeline = create_pipeline(
-        &database.config,
-        pipeline_id,
-        publication_name,
-        state_store.clone(),
-        destination.clone(),
-    );
-
-    pipeline.start().await.unwrap();
-    parent_sync_done.notified().await;
-
-    let _ = pipeline.shutdown_and_wait().await;
-
-    let table_rows = destination.get_table_rows().await;
-    let total_rows: usize = table_rows.values().map(|rows| rows.len()).sum();
-
-    assert_eq!(
-        total_rows, 3,
-        "Expected 3 rows synced from schema publication, but got {total_rows}"
-    );
-
-    let table_states = state_store.get_table_replication_states().await;
-
-    assert!(
-        table_states.contains_key(&parent_table_id),
-        "Parent table should be tracked in state"
-    );
-    assert_eq!(
-        table_states.len(),
-        1,
-        "Only the parent table should be tracked in state"
-    );
-
-    let parent_table_rows = table_rows
-        .iter()
-        .filter(|(table_id, _)| **table_id == parent_table_id)
-        .map(|(_, rows)| rows.len())
-        .sum::<usize>();
-    assert_eq!(
-        parent_table_rows, 3,
-        "Parent table should contain all rows from schema publication"
-    );
-}
-
 /// Tests that detached partitions are not automatically discovered with FOR TABLES IN SCHEMA publication.
 /// Similar to FOR ALL TABLES, the detached partition appears in pg_publication_tables but is not
 /// automatically discovered by the running pipeline without restart.

From 65f0b52b13ba0c434cf2e56ffeec8edba1a28bb2 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Fri, 24 Oct 2025 13:31:54 +0200
Subject: [PATCH 13/26] Improve

---
 etl-postgres/src/tokio/test_utils.rs | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/etl-postgres/src/tokio/test_utils.rs b/etl-postgres/src/tokio/test_utils.rs
index 8a0d52601..d452c3564 100644
--- a/etl-postgres/src/tokio/test_utils.rs
+++ b/etl-postgres/src/tokio/test_utils.rs
@@ -89,7 +89,9 @@ impl<G: GenericClient> PgDatabase<G> {
                 Some(schema_name) => format!(
                     "create publication {publication_name} for tables in schema {schema_name} with (publish_via_partition_root = true)"
                 ),
-                None => format!("create publication {publication_name} for all tables with (publish_via_partition_root = true)"),
+                None => format!(
+                    "create publication {publication_name} for all tables with (publish_via_partition_root = true)"
+                ),
             };
 
             client.execute(&create_publication_query, &[]).await?;
@@ -115,8 +117,9 @@ impl<G: GenericClient> PgDatabase<G> {
                     }
                 }
                 None => {
-                    let create_publication_query =
-                        format!("create publication {publication_name} for all tables with (publish_via_partition_root = true)");
+                    let create_publication_query = format!(
+                        "create publication {publication_name} for all tables with (publish_via_partition_root = true)"
+                    );
                     client.execute(&create_publication_query, &[]).await?;
                 }
             }

From 12416f44e5012145ab4828913247a1e4cdcc80c6 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Fri, 24 Oct 2025 13:37:00 +0200
Subject: [PATCH 14/26] Improve

---
 etl/tests/pipeline_with_partitioned_table.rs | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs
index 9935d8853..3b9ec3313 100644
--- a/etl/tests/pipeline_with_partitioned_table.rs
+++ b/etl/tests/pipeline_with_partitioned_table.rs
@@ -386,8 +386,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_
     // Insert into the detached partition (should NOT be replicated).
     database
         .run_sql(&format!(
-            "insert into {} (data, partition_key) values ('detached_event', 25)",
-            child_p1_qualified
+            "insert into {child_p1_qualified} (data, partition_key) values ('detached_event', 25)"
         ))
         .await
         .unwrap();
@@ -551,8 +550,7 @@ async fn partition_detach_with_all_tables_publication_does_not_replicate_detache
     // Insert into detached partition.
     database
         .run_sql(&format!(
-            "insert into {} (data, partition_key) values ('detached_event', 25)",
-            child_p1_qualified
+            "insert into {child_p1_qualified} (data, partition_key) values ('detached_event', 25)"
         ))
         .await
         .unwrap();
@@ -667,8 +665,7 @@ async fn partition_detach_with_all_tables_publication_does_replicate_detached_in
     // Insert into detached partition (while pipeline is stopped).
     database
         .run_sql(&format!(
-            "insert into {} (data, partition_key) values ('detached_event', 25)",
-            child_p1_qualified
+            "insert into {child_p1_qualified} (data, partition_key) values ('detached_event', 25)"
         ))
         .await
         .unwrap();
@@ -829,8 +826,7 @@ async fn partition_detach_with_schema_publication_does_not_replicate_detached_in
     // Insert into detached partition.
     database
         .run_sql(&format!(
-            "insert into {} (data, partition_key) values ('detached_event', 25)",
-            child_p1_qualified
+            "insert into {child_p1_qualified} (data, partition_key) values ('detached_event', 25)"
         ))
         .await
         .unwrap();
@@ -975,8 +971,7 @@ async fn partition_detach_with_schema_publication_does_replicate_detached_insert
     // Insert into detached partition (while pipeline is still running).
     database
         .run_sql(&format!(
-            "insert into {} (data, partition_key) values ('detached_event', 25)",
-            child_p1_qualified
+            "insert into {child_p1_qualified} (data, partition_key) values ('detached_event', 25)"
         ))
         .await
         .unwrap();

From 2127af1f318ff03a6a51893fdccb0d5be6bfebd4 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Fri, 24 Oct 2025 15:48:32 +0200
Subject: [PATCH 15/26] Improve

---
 etl/src/test_utils/event.rs                  |  19 +-
 etl/tests/pipeline_with_partitioned_table.rs | 181 +++++++++++++++++--
 2 files changed, 181 insertions(+), 19 deletions(-)

diff --git a/etl/src/test_utils/event.rs b/etl/src/test_utils/event.rs
index d05615f5f..b30361877 100644
--- a/etl/src/test_utils/event.rs
+++ b/etl/src/test_utils/event.rs
@@ -22,15 +22,20 @@ pub fn group_events_by_type_and_table_id(
     for event in events {
         let event_type = EventType::from(event);
         // This grouping only works on simple DML operations.
-        let table_id = match event {
-            Event::Insert(event) => Some(event.table_id),
-            Event::Update(event) => Some(event.table_id),
-            Event::Delete(event) => Some(event.table_id),
-            _ => None,
+        let table_ids = match event {
+            Event::Insert(event) => vec![event.table_id],
+            Event::Update(event) => vec![event.table_id],
+            Event::Delete(event) => vec![event.table_id],
+            Event::Truncate(event) => event
+                .rel_ids
+                .iter()
+                .map(|rel_id| TableId::new(*rel_id))
+                .collect(),
+            _ => vec![],
         };
-        if let Some(table_id) = table_id {
+        for table_id in table_ids {
             grouped
-                .entry((event_type, table_id))
+                .entry((event_type.clone(), table_id))
                 .or_insert_with(Vec::new)
                 .push(event.clone());
         }
diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs
index 3b9ec3313..5974b489c 100644
--- a/etl/tests/pipeline_with_partitioned_table.rs
+++ b/etl/tests/pipeline_with_partitioned_table.rs
@@ -163,6 +163,11 @@ async fn partitioned_table_copy_and_streams_new_data_from_new_partition() {
         .await
         .unwrap();
 
+    // Wait for CDC to deliver the new row.
+    let inserts_notify = destination
+        .wait_for_events_count(vec![(EventType::Insert, 1)])
+        .await;
+
     database
         .run_sql(&format!(
             "insert into {} (data, partition_key) values ('event3', 250)",
@@ -171,10 +176,6 @@ async fn partitioned_table_copy_and_streams_new_data_from_new_partition() {
         .await
         .unwrap();
 
-    // Wait for CDC to deliver the new row.
-    let inserts_notify = destination
-        .wait_for_events_count(vec![(EventType::Insert, 1)])
-        .await;
     inserts_notify.notified().await;
 
     let _ = pipeline.shutdown_and_wait().await;
@@ -305,6 +306,160 @@ async fn partition_drop_does_not_emit_delete_or_truncate() {
     );
 }
 
+/// Tests that issuing a TRUNCATE at the parent table level does emit a TRUNCATE event in the
+/// replication stream.
+#[tokio::test(flavor = "multi_thread")]
+async fn parent_table_truncate_does_emit_truncate_event() {
+    init_test_tracing();
+    let database = spawn_source_database().await;
+
+    let table_name = test_table_name("partitioned_events_truncate");
+    let partition_specs = [("p1", "from (1) to (100)"), ("p2", "from (100) to (200)")];
+
+    let (parent_table_id, _partition_table_ids) =
+        create_partitioned_table(&database, table_name.clone(), &partition_specs)
+            .await
+            .expect("Failed to create partitioned table");
+
+    database
+        .run_sql(&format!(
+            "insert into {} (data, partition_key) values \
+             ('event1', 50), ('event2', 150)",
+            table_name.as_quoted_identifier()
+        ))
+        .await
+        .unwrap();
+
+    let publication_name = "test_partitioned_pub_truncate".to_string();
+    database
+        .create_publication(&publication_name, std::slice::from_ref(&table_name))
+        .await
+        .expect("Failed to create publication");
+
+    let state_store = NotifyingStore::new();
+    let destination = TestDestinationWrapper::wrap(MemoryDestination::new());
+
+    let parent_sync_done = state_store
+        .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone)
+        .await;
+
+    let pipeline_id: PipelineId = random();
+    let mut pipeline = create_pipeline(
+        &database.config,
+        pipeline_id,
+        publication_name,
+        state_store.clone(),
+        destination.clone(),
+    );
+
+    pipeline.start().await.unwrap();
+
+    parent_sync_done.notified().await;
+
+    // Wait for the parent table truncate to be replicated.
+    let truncate_notify = destination
+        .wait_for_events_count(vec![(EventType::Truncate, 1)])
+        .await;
+
+    // We truncate the parent table.
+    database
+        .run_sql(&format!(
+            "truncate table {}",
+            table_name.as_quoted_identifier(),
+        ))
+        .await
+        .unwrap();
+
+    truncate_notify.notified().await;
+
+    let _ = pipeline.shutdown_and_wait().await;
+
+    let events = destination.get_events().await;
+    let grouped_events = group_events_by_type_and_table_id(&events);
+    let truncate_events = grouped_events
+        .get(&(EventType::Truncate, parent_table_id))
+        .map(|v| v.len())
+        .unwrap_or(0);
+
+    assert_eq!(
+        truncate_events, 1,
+        "Truncate event should be emitted for the parent table"
+    );
+}
+
+/// Tests that issuing a TRUNCATE at the child table level does NOT emit a TRUNCATE event in the
+/// replication stream.
+#[tokio::test(flavor = "multi_thread")]
+async fn child_table_truncate_does_not_emit_truncate_event() {
+    init_test_tracing();
+    let database = spawn_source_database().await;
+
+    let table_name = test_table_name("partitioned_events_truncate");
+    let partition_specs = [("p1", "from (1) to (100)"), ("p2", "from (100) to (200)")];
+
+    let (parent_table_id, _partition_table_ids) =
+        create_partitioned_table(&database, table_name.clone(), &partition_specs)
+            .await
+            .expect("Failed to create partitioned table");
+
+    database
+        .run_sql(&format!(
+            "insert into {} (data, partition_key) values \
+             ('event1', 50), ('event2', 150)",
+            table_name.as_quoted_identifier()
+        ))
+        .await
+        .unwrap();
+
+    let publication_name = "test_partitioned_pub_truncate".to_string();
+    database
+        .create_publication(&publication_name, std::slice::from_ref(&table_name))
+        .await
+        .expect("Failed to create publication");
+
+    let state_store = NotifyingStore::new();
+    let destination = TestDestinationWrapper::wrap(MemoryDestination::new());
+
+    let parent_sync_done = state_store
+        .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone)
+        .await;
+
+    let pipeline_id: PipelineId = random();
+    let mut pipeline = create_pipeline(
+        &database.config,
+        pipeline_id,
+        publication_name,
+        state_store.clone(),
+        destination.clone(),
+    );
+
+    pipeline.start().await.unwrap();
+
+    parent_sync_done.notified().await;
+
+    // We truncate the child table.
+    let child_p1_name = format!("{}_{}", table_name.name, "p1");
+    let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name);
+    database
+        .run_sql(&format!("truncate table {child_p1_qualified}",))
+        .await
+        .unwrap();
+
+    let _ = pipeline.shutdown_and_wait().await;
+
+    let events = destination.get_events().await;
+    let grouped_events = group_events_by_type_and_table_id(&events);
+    let truncate_events = grouped_events
+        .get(&(EventType::Truncate, parent_table_id))
+        .map(|v| v.len())
+        .unwrap_or(0);
+
+    assert_eq!(
+        truncate_events, 0,
+        "Truncate event should be not emitted for the child table"
+    );
+}
+
 /// Tests that detached partitions are not replicated with explicit publications.
 /// Once detached, the partition becomes independent and is not in the publication since
 /// only the parent table was explicitly added. Inserts to detached partitions are not replicated.
@@ -391,6 +546,11 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_
         .await
         .unwrap();
 
+    // Wait for the parent table insert to be replicated.
+    let inserts_notify = destination
+        .wait_for_events_count(vec![(EventType::Insert, 1)])
+        .await;
+
     // Insert into the parent table (should be replicated to remaining partition p2).
     database
         .run_sql(&format!(
@@ -400,10 +560,6 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_
         .await
         .unwrap();
 
-    // Wait for the parent table insert to be replicated.
-    let inserts_notify = destination
-        .wait_for_events_count(vec![(EventType::Insert, 1)])
-        .await;
     inserts_notify.notified().await;
 
     let _ = pipeline.shutdown_and_wait().await;
@@ -831,6 +987,11 @@ async fn partition_detach_with_schema_publication_does_not_replicate_detached_in
         .await
         .unwrap();
 
+    // Wait for the parent table insert to be replicated.
+    let inserts_notify = destination
+        .wait_for_events_count(vec![(EventType::Insert, 1)])
+        .await;
+
     // Insert into parent table (should be replicated).
     database
         .run_sql(&format!(
@@ -840,10 +1001,6 @@ async fn partition_detach_with_schema_publication_does_not_replicate_detached_in
         .await
         .unwrap();
 
-    // Wait for the parent table insert to be replicated.
-    let inserts_notify = destination
-        .wait_for_events_count(vec![(EventType::Insert, 1)])
-        .await;
     inserts_notify.notified().await;
 
     let _ = pipeline.shutdown_and_wait().await;

From a51a8d108ed54e1d9a4e63b95b1a1055cde97a71 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Fri, 24 Oct 2025 16:31:15 +0200
Subject: [PATCH 16/26] Improve

---
 etl-postgres/src/tokio/test_utils.rs         | 25 ++++--
 etl/tests/pipeline_with_partitioned_table.rs | 91 ++++++++++++++++++++
 2 files changed, 110 insertions(+), 6 deletions(-)

diff --git a/etl-postgres/src/tokio/test_utils.rs b/etl-postgres/src/tokio/test_utils.rs
index d452c3564..5c6b76123 100644
--- a/etl-postgres/src/tokio/test_utils.rs
+++ b/etl-postgres/src/tokio/test_utils.rs
@@ -46,14 +46,16 @@ impl<G: GenericClient> PgDatabase<G> {
         self.server_version
     }
 
-    /// Creates a Postgres publication for the specified tables.
+    /// Creates a Postgres publication for the specified tables with an optional configuration
+    /// parameter.
     ///
-    /// Sets up logical replication by creating a publication that includes
-    /// the given tables for change data capture.
-    pub async fn create_publication(
+    /// This method is used for specific cases which should mutate the defaults when creating a
+    /// publication which is done only for a small subset of tests.
+    pub async fn create_publication_with_config(
         &self,
         publication_name: &str,
         table_names: &[TableName],
+        publish_via_partition_root: bool,
     ) -> Result<(), tokio_postgres::Error> {
         let table_names = table_names
             .iter()
@@ -61,9 +63,10 @@ impl<G: GenericClient> PgDatabase<G> {
             .collect::<Vec<_>>();
 
         let create_publication_query = format!(
-            "create publication {} for table {} with (publish_via_partition_root = true)",
+            "create publication {} for table {} with (publish_via_partition_root = {})",
             publication_name,
-            table_names.join(", ")
+            table_names.join(", "),
+            publish_via_partition_root
         );
         self.client
             .as_ref()
@@ -74,6 +77,16 @@ impl<G: GenericClient> PgDatabase<G> {
         Ok(())
     }
 
+    /// Creates a Postgres publication for the specified tables.
+    pub async fn create_publication(
+        &self,
+        publication_name: &str,
+        table_names: &[TableName],
+    ) -> Result<(), tokio_postgres::Error> {
+        self.create_publication_with_config(publication_name, table_names, true)
+            .await
+    }
+
     pub async fn create_publication_for_all(
         &self,
         publication_name: &str,
diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs
index 5974b489c..7d8d453ba 100644
--- a/etl/tests/pipeline_with_partitioned_table.rs
+++ b/etl/tests/pipeline_with_partitioned_table.rs
@@ -1182,3 +1182,94 @@ async fn partition_detach_with_schema_publication_does_replicate_detached_insert
         "Detached partition should have 2 rows synced after pipeline restart (1 from initial data + 1 inserted)"
     );
 }
+
+/// Tests that the system gracefully stops in case `publish_via_partition_root` is set to `false`
+/// which is currently not supported.
+#[tokio::test(flavor = "multi_thread")]
+async fn partitioned_table_with_publish_via_root_false() {
+    init_test_tracing();
+    let database = spawn_source_database().await;
+
+    let table_name = test_table_name("partitioned_events");
+    let partition_specs = [("p1", "from (1) to (100)"), ("p2", "from (100) to (200)")];
+
+    let (parent_table_id, _partition_table_ids) =
+        create_partitioned_table(&database, table_name.clone(), &partition_specs)
+            .await
+            .expect("Failed to create partitioned table");
+
+    database
+        .run_sql(&format!(
+            "insert into {} (data, partition_key) values
+             ('event1', 50), ('event2', 150)",
+            table_name.as_quoted_identifier()
+        ))
+        .await
+        .unwrap();
+
+    let publication_name = "test_partitioned_pub".to_string();
+    database
+        .create_publication_with_config(&publication_name, std::slice::from_ref(&table_name), false)
+        .await
+        .expect("Failed to create publication");
+
+    let state_store = NotifyingStore::new();
+    let destination = TestDestinationWrapper::wrap(MemoryDestination::new());
+
+    let pipeline_id: PipelineId = random();
+    let mut pipeline = create_pipeline(
+        &database.config,
+        pipeline_id,
+        publication_name,
+        state_store.clone(),
+        destination.clone(),
+    );
+
+    // Wait on the sync done of the parent.
+    let parent_sync_done = state_store
+        .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone)
+        .await;
+
+    pipeline.start().await.unwrap();
+
+    // Wait on the sync done of the parent.
+    parent_sync_done.notified().await;
+
+    // Wait for the COMMIT event of the insert in the parent table. COMMIT events are always
+    // processed unconditionally because they don't contain relation-specific information.
+    //
+    // We use the COMMIT event to verify transaction processing: we can check whether the
+    // transaction's component events were captured. In this case, they should NOT be present
+    // because when `publication_via_partition_root` is `false`, events are tagged with child
+    // table OIDs. Since these child table OIDs are unknown to us (we always try to find the parent oid),
+    // those events are skipped.
+    let commit = destination
+        .wait_for_events_count(vec![(EventType::Commit, 1)])
+        .await;
+
+    database
+        .run_sql(&format!(
+            "insert into {} (data, partition_key) values \
+             ('event1', 50)",
+            table_name.as_quoted_identifier()
+        ))
+        .await
+        .unwrap();
+
+    commit.notified().await;
+
+    pipeline.shutdown_and_wait().await.unwrap();
+
+    // No inserts should be captured for the reasons explained above.
+    let events = destination.get_events().await;
+    let grouped_events = group_events_by_type_and_table_id(&events);
+    let p1_inserts = grouped_events
+        .get(&(EventType::Insert, parent_table_id))
+        .cloned()
+        .unwrap_or_default();
+    assert_eq!(
+        p1_inserts.len(),
+        0,
+        "Inserts in partition 'p1' should be skipped because `publish_via_partition_root` is `false`"
+    );
+}

From c87820f2dd514d61a51b0692dc30034ad85e7a8c Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Fri, 24 Oct 2025 16:47:27 +0200
Subject: [PATCH 17/26] Improve

---
 etl/tests/pipeline_with_partitioned_table.rs | 336 ++++++-------------
 1 file changed, 108 insertions(+), 228 deletions(-)

diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs
index 7d8d453ba..f049a9840 100644
--- a/etl/tests/pipeline_with_partitioned_table.rs
+++ b/etl/tests/pipeline_with_partitioned_table.rs
@@ -30,12 +30,11 @@ async fn partitioned_table_copy_replicates_existing_data() {
     let (parent_table_id, _partition_table_ids) =
         create_partitioned_table(&database, table_name.clone(), &partition_specs)
             .await
-            .expect("Failed to create partitioned table");
+            .unwrap();
 
     database
         .run_sql(&format!(
-            "insert into {} (data, partition_key) values
-             ('event1', 50), ('event2', 150), ('event3', 250)",
+            "insert into {} (data, partition_key) values ('event1', 50), ('event2', 150), ('event3', 250)",
             table_name.as_quoted_identifier()
         ))
         .await
@@ -45,7 +44,7 @@ async fn partitioned_table_copy_replicates_existing_data() {
     database
         .create_publication(&publication_name, std::slice::from_ref(&table_name))
         .await
-        .expect("Failed to create publication");
+        .unwrap();
 
     let state_store = NotifyingStore::new();
     let destination = TestDestinationWrapper::wrap(MemoryDestination::new());
@@ -73,32 +72,19 @@ async fn partitioned_table_copy_replicates_existing_data() {
     let table_rows = destination.get_table_rows().await;
     let total_rows: usize = table_rows.values().map(|rows| rows.len()).sum();
 
-    assert_eq!(
-        total_rows, 3,
-        "Expected 3 rows synced (one per partition), but got {total_rows}"
-    );
+    assert_eq!(total_rows, 3);
 
     let table_states = state_store.get_table_replication_states().await;
 
-    assert!(
-        table_states.contains_key(&parent_table_id),
-        "Parent table should be tracked in state"
-    );
-    assert_eq!(
-        table_states.len(),
-        1,
-        "Only the parent table should be tracked in state"
-    );
+    assert!(table_states.contains_key(&parent_table_id));
+    assert_eq!(table_states.len(), 1);
 
     let parent_table_rows = table_rows
         .iter()
         .filter(|(table_id, _)| **table_id == parent_table_id)
         .map(|(_, rows)| rows.len())
         .sum::<usize>();
-    assert_eq!(
-        parent_table_rows, 3,
-        "Parent table should contain all rows when publishing via root"
-    );
+    assert_eq!(parent_table_rows, 3);
 }
 
 /// Tests that CDC streams inserts to partitions created after pipeline startup.
@@ -114,12 +100,11 @@ async fn partitioned_table_copy_and_streams_new_data_from_new_partition() {
     let (parent_table_id, _initial_partition_table_ids) =
         create_partitioned_table(&database, table_name.clone(), &initial_partition_specs)
             .await
-            .expect("Failed to create initial partitioned table");
+            .unwrap();
 
     database
         .run_sql(&format!(
-            "insert into {} (data, partition_key) values \
-             ('event1', 50), ('event2', 150)",
+            "insert into {} (data, partition_key) values ('event1', 50), ('event2', 150)",
             table_name.as_quoted_identifier()
         ))
         .await
@@ -129,7 +114,7 @@ async fn partitioned_table_copy_and_streams_new_data_from_new_partition() {
     database
         .create_publication(&publication_name, std::slice::from_ref(&table_name))
         .await
-        .expect("Failed to create publication");
+        .unwrap();
 
     let state_store = NotifyingStore::new();
     let destination = TestDestinationWrapper::wrap(MemoryDestination::new());
@@ -182,10 +167,7 @@ async fn partitioned_table_copy_and_streams_new_data_from_new_partition() {
 
     let table_rows = destination.get_table_rows().await;
     let total_rows: usize = table_rows.values().map(|rows| rows.len()).sum();
-    assert_eq!(
-        total_rows, 2,
-        "Expected 2 rows synced from initial copy, got {total_rows}"
-    );
+    assert_eq!(total_rows, 2);
 
     let table_states = state_store.get_table_replication_states().await;
     assert!(table_states.contains_key(&parent_table_id));
@@ -220,12 +202,11 @@ async fn partition_drop_does_not_emit_delete_or_truncate() {
     let (parent_table_id, _partition_table_ids) =
         create_partitioned_table(&database, table_name.clone(), &partition_specs)
             .await
-            .expect("Failed to create partitioned table");
+            .unwrap();
 
     database
         .run_sql(&format!(
-            "insert into {} (data, partition_key) values \
-             ('event1', 50), ('event2', 150)",
+            "insert into {} (data, partition_key) values ('event1', 50), ('event2', 150)",
             table_name.as_quoted_identifier()
         ))
         .await
@@ -235,7 +216,7 @@ async fn partition_drop_does_not_emit_delete_or_truncate() {
     database
         .create_publication(&publication_name, std::slice::from_ref(&table_name))
         .await
-        .expect("Failed to create publication");
+        .unwrap();
 
     let state_store = NotifyingStore::new();
     let destination = TestDestinationWrapper::wrap(MemoryDestination::new());
@@ -258,28 +239,28 @@ async fn partition_drop_does_not_emit_delete_or_truncate() {
 
     let events_before = destination.get_events().await;
     let grouped_before = group_events_by_type_and_table_id(&events_before);
-    let del_before = grouped_before
+    let delete_count_before = grouped_before
         .get(&(EventType::Delete, parent_table_id))
         .map(|v| v.len())
         .unwrap_or(0);
-    let trunc_before = grouped_before
+    let truncate_count_before = grouped_before
         .get(&(EventType::Truncate, parent_table_id))
         .map(|v| v.len())
         .unwrap_or(0);
 
     // Detach and drop one child partition (DDL should not generate DML events).
-    let child_p1_name = format!("{}_{}", table_name.name, "p1");
-    let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name);
+    let partition_p1_name = format!("{}_{}", table_name.name, "p1");
+    let partition_p1_qualified = format!("{}.{}", table_name.schema, partition_p1_name);
     database
         .run_sql(&format!(
             "alter table {} detach partition {}",
             table_name.as_quoted_identifier(),
-            child_p1_qualified
+            partition_p1_qualified
         ))
         .await
         .unwrap();
     database
-        .run_sql(&format!("drop table {child_p1_qualified}"))
+        .run_sql(&format!("drop table {partition_p1_qualified}"))
         .await
         .unwrap();
 
@@ -287,23 +268,17 @@ async fn partition_drop_does_not_emit_delete_or_truncate() {
 
     let events_after = destination.get_events().await;
     let grouped_after = group_events_by_type_and_table_id(&events_after);
-    let del_after = grouped_after
+    let delete_count_after = grouped_after
         .get(&(EventType::Delete, parent_table_id))
         .map(|v| v.len())
         .unwrap_or(0);
-    let trunc_after = grouped_after
+    let truncate_count_after = grouped_after
         .get(&(EventType::Truncate, parent_table_id))
         .map(|v| v.len())
         .unwrap_or(0);
 
-    assert_eq!(
-        del_after, del_before,
-        "Partition drop must not emit DELETE events"
-    );
-    assert_eq!(
-        trunc_after, trunc_before,
-        "Partition drop must not emit TRUNCATE events"
-    );
+    assert_eq!(delete_count_after, delete_count_before);
+    assert_eq!(truncate_count_after, truncate_count_before);
 }
 
 /// Tests that issuing a TRUNCATE at the parent table level does emit a TRUNCATE event in the
@@ -319,12 +294,11 @@ async fn parent_table_truncate_does_emit_truncate_event() {
     let (parent_table_id, _partition_table_ids) =
         create_partitioned_table(&database, table_name.clone(), &partition_specs)
             .await
-            .expect("Failed to create partitioned table");
+            .unwrap();
 
     database
         .run_sql(&format!(
-            "insert into {} (data, partition_key) values \
-             ('event1', 50), ('event2', 150)",
+            "insert into {} (data, partition_key) values ('event1', 50), ('event2', 150)",
             table_name.as_quoted_identifier()
         ))
         .await
@@ -334,7 +308,7 @@ async fn parent_table_truncate_does_emit_truncate_event() {
     database
         .create_publication(&publication_name, std::slice::from_ref(&table_name))
         .await
-        .expect("Failed to create publication");
+        .unwrap();
 
     let state_store = NotifyingStore::new();
     let destination = TestDestinationWrapper::wrap(MemoryDestination::new());
@@ -376,15 +350,12 @@ async fn parent_table_truncate_does_emit_truncate_event() {
 
     let events = destination.get_events().await;
     let grouped_events = group_events_by_type_and_table_id(&events);
-    let truncate_events = grouped_events
+    let truncate_count = grouped_events
         .get(&(EventType::Truncate, parent_table_id))
         .map(|v| v.len())
         .unwrap_or(0);
 
-    assert_eq!(
-        truncate_events, 1,
-        "Truncate event should be emitted for the parent table"
-    );
+    assert_eq!(truncate_count, 1);
 }
 
 /// Tests that issuing a TRUNCATE at the child table level does NOT emit a TRUNCATE event in the
@@ -400,12 +371,11 @@ async fn child_table_truncate_does_not_emit_truncate_event() {
     let (parent_table_id, _partition_table_ids) =
         create_partitioned_table(&database, table_name.clone(), &partition_specs)
             .await
-            .expect("Failed to create partitioned table");
+            .unwrap();
 
     database
         .run_sql(&format!(
-            "insert into {} (data, partition_key) values \
-             ('event1', 50), ('event2', 150)",
+            "insert into {} (data, partition_key) values ('event1', 50), ('event2', 150)",
             table_name.as_quoted_identifier()
         ))
         .await
@@ -415,7 +385,7 @@ async fn child_table_truncate_does_not_emit_truncate_event() {
     database
         .create_publication(&publication_name, std::slice::from_ref(&table_name))
         .await
-        .expect("Failed to create publication");
+        .unwrap();
 
     let state_store = NotifyingStore::new();
     let destination = TestDestinationWrapper::wrap(MemoryDestination::new());
@@ -438,10 +408,10 @@ async fn child_table_truncate_does_not_emit_truncate_event() {
     parent_sync_done.notified().await;
 
     // We truncate the child table.
-    let child_p1_name = format!("{}_{}", table_name.name, "p1");
-    let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name);
+    let partition_p1_name = format!("{}_{}", table_name.name, "p1");
+    let partition_p1_qualified = format!("{}.{}", table_name.schema, partition_p1_name);
     database
-        .run_sql(&format!("truncate table {child_p1_qualified}",))
+        .run_sql(&format!("truncate table {partition_p1_qualified}"))
         .await
         .unwrap();
 
@@ -449,15 +419,12 @@ async fn child_table_truncate_does_not_emit_truncate_event() {
 
     let events = destination.get_events().await;
     let grouped_events = group_events_by_type_and_table_id(&events);
-    let truncate_events = grouped_events
+    let truncate_count = grouped_events
         .get(&(EventType::Truncate, parent_table_id))
         .map(|v| v.len())
         .unwrap_or(0);
 
-    assert_eq!(
-        truncate_events, 0,
-        "Truncate event should be not emitted for the child table"
-    );
+    assert_eq!(truncate_count, 0);
 }
 
 /// Tests that detached partitions are not replicated with explicit publications.
@@ -474,15 +441,14 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_
     let (parent_table_id, partition_table_ids) =
         create_partitioned_table(&database, table_name.clone(), &partition_specs)
             .await
-            .expect("Failed to create partitioned table");
+            .unwrap();
 
     let p1_table_id = partition_table_ids[0];
 
     // Insert initial data into both partitions.
     database
         .run_sql(&format!(
-            "insert into {} (data, partition_key) values \
-             ('event1', 50), ('event2', 150)",
+            "insert into {} (data, partition_key) values ('event1', 50), ('event2', 150)",
             table_name.as_quoted_identifier()
         ))
         .await
@@ -493,7 +459,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_
     database
         .create_publication(&publication_name, std::slice::from_ref(&table_name))
         .await
-        .expect("Failed to create publication");
+        .unwrap();
 
     let state_store = NotifyingStore::new();
     let destination = TestDestinationWrapper::wrap(MemoryDestination::new());
@@ -521,19 +487,16 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_
         .get(&parent_table_id)
         .map(|rows| rows.len())
         .unwrap_or(0);
-    assert_eq!(
-        parent_rows, 2,
-        "Parent table should have 2 rows from initial COPY"
-    );
+    assert_eq!(parent_rows, 2);
 
     // Detach partition p1 from parent.
-    let child_p1_name = format!("{}_{}", table_name.name, "p1");
-    let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name);
+    let partition_p1_name = format!("{}_{}", table_name.name, "p1");
+    let partition_p1_qualified = format!("{}.{}", table_name.schema, partition_p1_name);
     database
         .run_sql(&format!(
             "alter table {} detach partition {}",
             table_name.as_quoted_identifier(),
-            child_p1_qualified
+            partition_p1_qualified
         ))
         .await
         .unwrap();
@@ -541,7 +504,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_
     // Insert into the detached partition (should NOT be replicated).
     database
         .run_sql(&format!(
-            "insert into {child_p1_qualified} (data, partition_key) values ('detached_event', 25)"
+            "insert into {partition_p1_qualified} (data, partition_key) values ('detached_event', 25)"
         ))
         .await
         .unwrap();
@@ -573,22 +536,14 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_
         .get(&(EventType::Insert, parent_table_id))
         .cloned()
         .unwrap_or_default();
-    assert_eq!(
-        parent_inserts.len(),
-        1,
-        "Parent table should have exactly 1 CDC insert event"
-    );
+    assert_eq!(parent_inserts.len(), 1);
 
     // Detached partition should have NO insert events.
     let detached_inserts = grouped
         .get(&(EventType::Insert, p1_table_id))
         .cloned()
         .unwrap_or_default();
-    assert_eq!(
-        detached_inserts.len(),
-        0,
-        "Detached partition inserts should NOT be replicated"
-    );
+    assert_eq!(detached_inserts.len(), 0);
 }
 
 /// Tests catalog state when a partition is detached with FOR ALL TABLES publication.
@@ -605,15 +560,14 @@ async fn partition_detach_with_all_tables_publication_does_not_replicate_detache
     let (parent_table_id, partition_table_ids) =
         create_partitioned_table(&database, table_name.clone(), &partition_specs)
             .await
-            .expect("Failed to create partitioned table");
+            .unwrap();
 
     let p1_table_id = partition_table_ids[0];
 
     // Insert initial data.
     database
         .run_sql(&format!(
-            "insert into {} (data, partition_key) values \
-             ('event1', 50), ('event2', 150)",
+            "insert into {} (data, partition_key) values ('event1', 50), ('event2', 150)",
             table_name.as_quoted_identifier()
         ))
         .await
@@ -647,23 +601,17 @@ async fn partition_detach_with_all_tables_publication_does_not_replicate_detache
 
     // Verify the initial state. The parent table is the only table tracked.
     let table_states_before = state_store.get_table_replication_states().await;
-    assert!(
-        table_states_before.contains_key(&parent_table_id),
-        "Parent table should be tracked before detachment"
-    );
-    assert!(
-        !table_states_before.contains_key(&p1_table_id),
-        "Child partition p1 should NOT be tracked separately before detachment"
-    );
+    assert!(table_states_before.contains_key(&parent_table_id));
+    assert!(!table_states_before.contains_key(&p1_table_id));
 
     // Detach partition p1.
-    let child_p1_name = format!("{}_{}", table_name.name, "p1");
-    let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name);
+    let partition_p1_name = format!("{}_{}", table_name.name, "p1");
+    let partition_p1_qualified = format!("{}.{}", table_name.schema, partition_p1_name);
     database
         .run_sql(&format!(
             "alter table {} detach partition {}",
             table_name.as_quoted_identifier(),
-            child_p1_qualified
+            partition_p1_qualified
         ))
         .await
         .unwrap();
@@ -680,10 +628,7 @@ async fn partition_detach_with_all_tables_publication_does_not_replicate_detache
         .await
         .unwrap();
     let inherits_count: i64 = inherits_check[0].get("cnt");
-    assert_eq!(
-        inherits_count, 0,
-        "Detached partition should have no parent in pg_inherits"
-    );
+    assert_eq!(inherits_count, 0);
 
     // Check pg_publication_tables. With FOR ALL TABLES, the detached partition should appear.
     let pub_tables_check = database
@@ -693,20 +638,17 @@ async fn partition_detach_with_all_tables_publication_does_not_replicate_detache
         .query(
             "select count(*) as cnt from pg_publication_tables
              where pubname = $1 and tablename = $2",
-            &[&publication_name, &child_p1_name],
+            &[&publication_name, &partition_p1_name],
         )
         .await
         .unwrap();
     let pub_tables_count: i64 = pub_tables_check[0].get("cnt");
-    assert_eq!(
-        pub_tables_count, 1,
-        "Detached partition should appear in pg_publication_tables for ALL TABLES publication"
-    );
+    assert_eq!(pub_tables_count, 1);
 
     // Insert into detached partition.
     database
         .run_sql(&format!(
-            "insert into {child_p1_qualified} (data, partition_key) values ('detached_event', 25)"
+            "insert into {partition_p1_qualified} (data, partition_key) values ('detached_event', 25)"
         ))
         .await
         .unwrap();
@@ -720,10 +662,7 @@ async fn partition_detach_with_all_tables_publication_does_not_replicate_detache
     // The pipeline state should still only track the parent table (not the detached partition)
     // because it hasn't re-scanned for new tables.
     let table_states_after = state_store.get_table_replication_states().await;
-    assert!(
-        table_states_after.contains_key(&parent_table_id),
-        "Parent table should still be tracked after detachment"
-    );
+    assert!(table_states_after.contains_key(&parent_table_id));
 
     // The detached partition insert should NOT be replicated in this pipeline run
     // because the pipeline hasn't discovered it as a new table.
@@ -733,11 +672,7 @@ async fn partition_detach_with_all_tables_publication_does_not_replicate_detache
         .get(&(EventType::Insert, p1_table_id))
         .cloned()
         .unwrap_or_default();
-    assert_eq!(
-        detached_inserts.len(),
-        0,
-        "Detached partition inserts should NOT be replicated without table re-discovery"
-    );
+    assert_eq!(detached_inserts.len(), 0);
 }
 
 /// Tests that a detached partition is discovered as a new table after pipeline restart.
@@ -754,15 +689,14 @@ async fn partition_detach_with_all_tables_publication_does_replicate_detached_in
     let (parent_table_id, partition_table_ids) =
         create_partitioned_table(&database, table_name.clone(), &partition_specs)
             .await
-            .expect("Failed to create partitioned table");
+            .unwrap();
 
     let p1_table_id = partition_table_ids[0];
 
     // Insert initial data.
     database
         .run_sql(&format!(
-            "insert into {} (data, partition_key) values \
-             ('event1', 50), ('event2', 150)",
+            "insert into {} (data, partition_key) values ('event1', 50), ('event2', 150)",
             table_name.as_quoted_identifier()
         ))
         .await
@@ -797,23 +731,17 @@ async fn partition_detach_with_all_tables_publication_does_replicate_detached_in
 
     // Verify the initial state. The parent table is the only table tracked.
     let table_states_before = state_store.get_table_replication_states().await;
-    assert!(
-        table_states_before.contains_key(&parent_table_id),
-        "Parent table should be tracked before detachment"
-    );
-    assert!(
-        !table_states_before.contains_key(&p1_table_id),
-        "Child partition p1 should NOT be tracked separately before detachment"
-    );
+    assert!(table_states_before.contains_key(&parent_table_id));
+    assert!(!table_states_before.contains_key(&p1_table_id));
 
     // Detach partition p1.
-    let child_p1_name = format!("{}_{}", table_name.name, "p1");
-    let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name);
+    let partition_p1_name = format!("{}_{}", table_name.name, "p1");
+    let partition_p1_qualified = format!("{}.{}", table_name.schema, partition_p1_name);
     database
         .run_sql(&format!(
             "alter table {} detach partition {}",
             table_name.as_quoted_identifier(),
-            child_p1_qualified
+            partition_p1_qualified
         ))
         .await
         .unwrap();
@@ -821,7 +749,7 @@ async fn partition_detach_with_all_tables_publication_does_replicate_detached_in
     // Insert into detached partition (while pipeline is stopped).
     database
         .run_sql(&format!(
-            "insert into {child_p1_qualified} (data, partition_key) values ('detached_event', 25)"
+            "insert into {partition_p1_qualified} (data, partition_key) values ('detached_event', 25)"
         ))
         .await
         .unwrap();
@@ -851,29 +779,20 @@ async fn partition_detach_with_all_tables_publication_does_replicate_detached_in
 
     // Verify the detached partition was discovered and synced.
     let table_states_after = state_store.get_table_replication_states().await;
-    assert!(
-        table_states_after.contains_key(&p1_table_id),
-        "Detached partition should be discovered as a standalone table after restart"
-    );
+    assert!(table_states_after.contains_key(&p1_table_id));
 
     // Verify the data from the detached partition was copied.
     let table_rows = destination.get_table_rows().await;
     let parent_rows: usize = table_rows
-        .get(&p1_table_id)
+        .get(&parent_table_id)
         .map(|rows| rows.len())
         .unwrap_or(0);
-    assert_eq!(
-        parent_rows, 2,
-        "The parent table should have the initial rows"
-    );
+    assert_eq!(parent_rows, 2);
     let detached_rows: usize = table_rows
         .get(&p1_table_id)
         .map(|rows| rows.len())
         .unwrap_or(0);
-    assert_eq!(
-        detached_rows, 2,
-        "Detached partition should have rows synced after pipeline restart"
-    );
+    assert_eq!(detached_rows, 2);
 }
 
 /// Tests that detached partitions are not automatically discovered with FOR TABLES IN SCHEMA publication.
@@ -899,14 +818,13 @@ async fn partition_detach_with_schema_publication_does_not_replicate_detached_in
     let (parent_table_id, partition_table_ids) =
         create_partitioned_table(&database, table_name.clone(), &partition_specs)
             .await
-            .expect("Failed to create partitioned table");
+            .unwrap();
 
     let p1_table_id = partition_table_ids[0];
 
     database
         .run_sql(&format!(
-            "insert into {} (data, partition_key) values \
-             ('event1', 50), ('event2', 150)",
+            "insert into {} (data, partition_key) values ('event1', 50), ('event2', 150)",
             table_name.as_quoted_identifier()
         ))
         .await
@@ -940,23 +858,17 @@ async fn partition_detach_with_schema_publication_does_not_replicate_detached_in
 
     // Verify initial state.
     let table_states_before = state_store.get_table_replication_states().await;
-    assert!(
-        table_states_before.contains_key(&parent_table_id),
-        "Parent table should be tracked before detachment"
-    );
-    assert!(
-        !table_states_before.contains_key(&p1_table_id),
-        "Child partition p1 should NOT be tracked separately before detachment"
-    );
+    assert!(table_states_before.contains_key(&parent_table_id));
+    assert!(!table_states_before.contains_key(&p1_table_id));
 
     // Detach partition p1.
-    let child_p1_name = format!("{}_{}", table_name.name, "p1");
-    let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name);
+    let partition_p1_name = format!("{}_{}", table_name.name, "p1");
+    let partition_p1_qualified = format!("{}.{}", table_name.schema, partition_p1_name);
     database
         .run_sql(&format!(
             "alter table {} detach partition {}",
             table_name.as_quoted_identifier(),
-            child_p1_qualified
+            partition_p1_qualified
         ))
         .await
         .unwrap();
@@ -969,20 +881,17 @@ async fn partition_detach_with_schema_publication_does_not_replicate_detached_in
         .query(
             "select count(*) as cnt from pg_publication_tables
              where pubname = $1 and tablename = $2",
-            &[&publication_name, &child_p1_name],
+            &[&publication_name, &partition_p1_name],
         )
         .await
         .unwrap();
     let pub_tables_count: i64 = pub_tables_check[0].get("cnt");
-    assert_eq!(
-        pub_tables_count, 1,
-        "Detached partition should appear in pg_publication_tables for TABLES IN SCHEMA publication"
-    );
+    assert_eq!(pub_tables_count, 1);
 
     // Insert into detached partition.
     database
         .run_sql(&format!(
-            "insert into {child_p1_qualified} (data, partition_key) values ('detached_event', 25)"
+            "insert into {partition_p1_qualified} (data, partition_key) values ('detached_event', 25)"
         ))
         .await
         .unwrap();
@@ -1007,10 +916,7 @@ async fn partition_detach_with_schema_publication_does_not_replicate_detached_in
 
     // The pipeline state should still only track the parent table.
     let table_states_after = state_store.get_table_replication_states().await;
-    assert!(
-        table_states_after.contains_key(&parent_table_id),
-        "Parent table should still be tracked after detachment"
-    );
+    assert!(table_states_after.contains_key(&parent_table_id));
 
     // Verify events.
     let events = destination.get_events().await;
@@ -1021,22 +927,14 @@ async fn partition_detach_with_schema_publication_does_not_replicate_detached_in
         .get(&(EventType::Insert, parent_table_id))
         .cloned()
         .unwrap_or_default();
-    assert_eq!(
-        parent_inserts.len(),
-        1,
-        "Parent table should have exactly 1 CDC insert event"
-    );
+    assert_eq!(parent_inserts.len(), 1);
 
     // Detached partition inserts should NOT be replicated without table re-discovery.
     let detached_inserts = grouped
         .get(&(EventType::Insert, p1_table_id))
         .cloned()
         .unwrap_or_default();
-    assert_eq!(
-        detached_inserts.len(),
-        0,
-        "Detached partition inserts should NOT be replicated without table re-discovery"
-    );
+    assert_eq!(detached_inserts.len(), 0);
 }
 
 /// Tests that a detached partition is discovered as a new table after pipeline restart
@@ -1062,14 +960,13 @@ async fn partition_detach_with_schema_publication_does_replicate_detached_insert
     let (parent_table_id, partition_table_ids) =
         create_partitioned_table(&database, table_name.clone(), &partition_specs)
             .await
-            .expect("Failed to create partitioned table");
+            .unwrap();
 
     let p1_table_id = partition_table_ids[0];
 
     database
         .run_sql(&format!(
-            "insert into {} (data, partition_key) values \
-             ('event1', 50), ('event2', 150)",
+            "insert into {} (data, partition_key) values ('event1', 50), ('event2', 150)",
             table_name.as_quoted_identifier()
         ))
         .await
@@ -1104,23 +1001,17 @@ async fn partition_detach_with_schema_publication_does_replicate_detached_insert
 
     // Verify initial state.
     let table_states_before = state_store.get_table_replication_states().await;
-    assert!(
-        table_states_before.contains_key(&parent_table_id),
-        "Parent table should be tracked before detachment"
-    );
-    assert!(
-        !table_states_before.contains_key(&p1_table_id),
-        "Child partition p1 should NOT be tracked separately before detachment"
-    );
+    assert!(table_states_before.contains_key(&parent_table_id));
+    assert!(!table_states_before.contains_key(&p1_table_id));
 
     // Detach partition p1.
-    let child_p1_name = format!("{}_{}", table_name.name, "p1");
-    let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name);
+    let partition_p1_name = format!("{}_{}", table_name.name, "p1");
+    let partition_p1_qualified = format!("{}.{}", table_name.schema, partition_p1_name);
     database
         .run_sql(&format!(
             "alter table {} detach partition {}",
             table_name.as_quoted_identifier(),
-            child_p1_qualified
+            partition_p1_qualified
         ))
         .await
         .unwrap();
@@ -1128,7 +1019,7 @@ async fn partition_detach_with_schema_publication_does_replicate_detached_insert
     // Insert into detached partition (while pipeline is still running).
     database
         .run_sql(&format!(
-            "insert into {child_p1_qualified} (data, partition_key) values ('detached_event', 25)"
+            "insert into {partition_p1_qualified} (data, partition_key) values ('detached_event', 25)"
         ))
         .await
         .unwrap();
@@ -1158,10 +1049,7 @@ async fn partition_detach_with_schema_publication_does_replicate_detached_insert
 
     // Verify the detached partition was discovered and synced.
     let table_states_after = state_store.get_table_replication_states().await;
-    assert!(
-        table_states_after.contains_key(&p1_table_id),
-        "Detached partition should be discovered as a standalone table after restart"
-    );
+    assert!(table_states_after.contains_key(&p1_table_id));
 
     // Verify the data from the detached partition was copied.
     let table_rows = destination.get_table_rows().await;
@@ -1169,22 +1057,20 @@ async fn partition_detach_with_schema_publication_does_replicate_detached_insert
         .get(&parent_table_id)
         .map(|rows| rows.len())
         .unwrap_or(0);
-    assert_eq!(
-        parent_rows, 2,
-        "Parent table should have the initial 2 rows from first pipeline run"
-    );
+    assert_eq!(parent_rows, 2);
     let detached_rows: usize = table_rows
         .get(&p1_table_id)
         .map(|rows| rows.len())
         .unwrap_or(0);
-    assert_eq!(
-        detached_rows, 2,
-        "Detached partition should have 2 rows synced after pipeline restart (1 from initial data + 1 inserted)"
-    );
+    assert_eq!(detached_rows, 2);
 }
 
-/// Tests that the system gracefully stops in case `publish_via_partition_root` is set to `false`
-/// which is currently not supported.
+/// Tests that the system doesn't crash abruptly `publish_via_partition_root` is set to `false`.
+///
+/// The current behavior is to silently not perform replication, but we might want to refine this behavior
+/// and throw an error when we detect that there are partitioned tables in a publication and the setting
+/// is `false`. This way, we would be able to avoid forcing the user to always set `publish_via_partition_root=true`
+/// when it's unnecessary.
 #[tokio::test(flavor = "multi_thread")]
 async fn partitioned_table_with_publish_via_root_false() {
     init_test_tracing();
@@ -1196,12 +1082,11 @@ async fn partitioned_table_with_publish_via_root_false() {
     let (parent_table_id, _partition_table_ids) =
         create_partitioned_table(&database, table_name.clone(), &partition_specs)
             .await
-            .expect("Failed to create partitioned table");
+            .unwrap();
 
     database
         .run_sql(&format!(
-            "insert into {} (data, partition_key) values
-             ('event1', 50), ('event2', 150)",
+            "insert into {} (data, partition_key) values ('event1', 50), ('event2', 150)",
             table_name.as_quoted_identifier()
         ))
         .await
@@ -1211,7 +1096,7 @@ async fn partitioned_table_with_publish_via_root_false() {
     database
         .create_publication_with_config(&publication_name, std::slice::from_ref(&table_name), false)
         .await
-        .expect("Failed to create publication");
+        .unwrap();
 
     let state_store = NotifyingStore::new();
     let destination = TestDestinationWrapper::wrap(MemoryDestination::new());
@@ -1249,8 +1134,7 @@ async fn partitioned_table_with_publish_via_root_false() {
 
     database
         .run_sql(&format!(
-            "insert into {} (data, partition_key) values \
-             ('event1', 50)",
+            "insert into {} (data, partition_key) values ('event1', 50)",
             table_name.as_quoted_identifier()
         ))
         .await
@@ -1263,13 +1147,9 @@ async fn partitioned_table_with_publish_via_root_false() {
     // No inserts should be captured for the reasons explained above.
     let events = destination.get_events().await;
     let grouped_events = group_events_by_type_and_table_id(&events);
-    let p1_inserts = grouped_events
+    let parent_inserts = grouped_events
         .get(&(EventType::Insert, parent_table_id))
         .cloned()
         .unwrap_or_default();
-    assert_eq!(
-        p1_inserts.len(),
-        0,
-        "Inserts in partition 'p1' should be skipped because `publish_via_partition_root` is `false`"
-    );
+    assert!(parent_inserts.is_empty());
 }

From 5276bec0087358720616ee6ec545bdd70b0a28a2 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Fri, 24 Oct 2025 17:12:17 +0200
Subject: [PATCH 18/26] Improve

---
 etl/tests/pipeline_with_partitioned_table.rs | 179 +++++++++++++++++++
 1 file changed, 179 insertions(+)

diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs
index f049a9840..7d6a82e3f 100644
--- a/etl/tests/pipeline_with_partitioned_table.rs
+++ b/etl/tests/pipeline_with_partitioned_table.rs
@@ -10,6 +10,7 @@ use etl::test_utils::test_destination_wrapper::TestDestinationWrapper;
 use etl::test_utils::test_schema::create_partitioned_table;
 use etl::types::EventType;
 use etl::types::PipelineId;
+use etl::types::TableId;
 use etl_telemetry::tracing::init_test_tracing;
 use rand::random;
 
@@ -1065,6 +1066,184 @@ async fn partition_detach_with_schema_publication_does_replicate_detached_insert
     assert_eq!(detached_rows, 2);
 }
 
+/// Tests that nested partitions (sub-partitioned tables) work correctly.
+/// Creates a two-level partition hierarchy where one partition is itself partitioned,
+/// and verifies that both initial COPY and CDC streaming work correctly.
+/// Only the top-level parent table should be tracked in the pipeline state.
+#[tokio::test(flavor = "multi_thread")]
+async fn nested_partitioned_table_copy_and_cdc() {
+    init_test_tracing();
+    let database = spawn_source_database().await;
+
+    let table_name = test_table_name("nested_partitioned_events");
+
+    // Create the parent partitioned table (Level 1).
+    // Primary key must include all partitioning columns used at any level.
+    database
+        .run_sql(&format!(
+            "create table {} (
+                id bigserial,
+                data text NOT NULL,
+                partition_key integer NOT NULL,
+                sub_partition_key integer NOT NULL,
+                primary key (id, partition_key, sub_partition_key)
+            ) partition by range (partition_key)",
+            table_name.as_quoted_identifier()
+        ))
+        .await
+        .unwrap();
+
+    // Get parent table ID.
+    let parent_row = database
+        .client
+        .as_ref()
+        .unwrap()
+        .query_one(
+            "select c.oid from pg_class c join pg_namespace n on n.oid = c.relnamespace
+             where n.nspname = $1 and c.relname = $2",
+            &[&table_name.schema, &table_name.name],
+        )
+        .await
+        .unwrap();
+    let parent_table_id: TableId = parent_row.get(0);
+
+    // Create first partition (simple leaf partition) (Level 2a).
+    let p1_name = format!("{}_{}", table_name.name, "p1");
+    let p1_qualified = format!("{}.{}", table_name.schema, p1_name);
+    database
+        .run_sql(&format!(
+            "create table {} partition of {} for values from (1) to (100)",
+            p1_qualified,
+            table_name.as_quoted_identifier()
+        ))
+        .await
+        .unwrap();
+
+    // Create second partition that is itself partitioned (Level 2b).
+    let p2_name = format!("{}_{}", table_name.name, "p2");
+    let p2_qualified = format!("{}.{}", table_name.schema, p2_name);
+    database
+        .run_sql(&format!(
+            "create table {} partition of {} for values from (100) to (200) partition by range (sub_partition_key)",
+            p2_qualified,
+            table_name.as_quoted_identifier()
+        ))
+        .await
+        .unwrap();
+
+    // Create sub-partitions of p2 (Level 3).
+    let p2_sub1_name = format!("{}_{}", p2_name, "sub1");
+    let p2_sub1_qualified = format!("{}.{}", table_name.schema, p2_sub1_name);
+    database
+        .run_sql(&format!(
+            "create table {} partition of {} for values from (1) to (50)",
+            p2_sub1_qualified,
+            p2_qualified
+        ))
+        .await
+        .unwrap();
+
+    let p2_sub2_name = format!("{}_{}", p2_name, "sub2");
+    let p2_sub2_qualified = format!("{}.{}", table_name.schema, p2_sub2_name);
+    database
+        .run_sql(&format!(
+            "create table {} partition of {} for values from (50) to (100)",
+            p2_sub2_qualified,
+            p2_qualified
+        ))
+        .await
+        .unwrap();
+
+    // Insert initial data into different partitions:
+    // - event_p1 goes to the simple leaf partition p1
+    // - event_p2_sub1 goes to nested partition p2 -> p2_sub1
+    // - event_p2_sub2 goes to nested partition p2 -> p2_sub2
+    database
+        .run_sql(&format!(
+            "insert into {} (data, partition_key, sub_partition_key) values
+             ('event_p1', 50, 25),
+             ('event_p2_sub1', 150, 25),
+             ('event_p2_sub2', 150, 75)",
+            table_name.as_quoted_identifier()
+        ))
+        .await
+        .unwrap();
+
+    let publication_name = "test_nested_partitioned_pub".to_string();
+    database
+        .create_publication(&publication_name, std::slice::from_ref(&table_name))
+        .await
+        .unwrap();
+
+    let state_store = NotifyingStore::new();
+    let destination = TestDestinationWrapper::wrap(MemoryDestination::new());
+
+    // Register notification for initial copy completion.
+    let parent_sync_done = state_store
+        .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone)
+        .await;
+
+    let pipeline_id: PipelineId = random();
+    let mut pipeline = create_pipeline(
+        &database.config,
+        pipeline_id,
+        publication_name,
+        state_store.clone(),
+        destination.clone(),
+    );
+
+    pipeline.start().await.unwrap();
+
+    parent_sync_done.notified().await;
+
+    // Verify initial COPY replicated all 3 rows.
+    let table_rows = destination.get_table_rows().await;
+    let total_rows: usize = table_rows.values().map(|rows| rows.len()).sum();
+    assert_eq!(total_rows, 3);
+
+    // Verify only the parent table is tracked (not intermediate or leaf partitions).
+    let table_states = state_store.get_table_replication_states().await;
+    assert!(table_states.contains_key(&parent_table_id));
+    assert_eq!(table_states.len(), 1);
+
+    // Verify all rows are attributed to the parent table.
+    let parent_table_rows = table_rows
+        .iter()
+        .filter(|(table_id, _)| **table_id == parent_table_id)
+        .map(|(_, rows)| rows.len())
+        .sum::<usize>();
+    assert_eq!(parent_table_rows, 3);
+
+    // Insert new rows into different nested partitions.
+    let inserts_notify = destination
+        .wait_for_events_count(vec![(EventType::Insert, 3)])
+        .await;
+
+    database
+        .run_sql(&format!(
+            "insert into {} (data, partition_key, sub_partition_key) values
+             ('new_event_p1', 75, 30),
+             ('new_event_p2_sub1', 125, 40),
+             ('new_event_p2_sub2', 175, 60)",
+            table_name.as_quoted_identifier()
+        ))
+        .await
+        .unwrap();
+
+    inserts_notify.notified().await;
+
+    let _ = pipeline.shutdown_and_wait().await;
+
+    // Verify that events were captured for all nested partitions.
+    let events = destination.get_events().await;
+    let grouped = group_events_by_type_and_table_id(&events);
+    let parent_inserts = grouped
+        .get(&(EventType::Insert, parent_table_id))
+        .cloned()
+        .unwrap_or_default();
+    assert_eq!(parent_inserts.len(), 3);
+}
+
 /// Tests that the system doesn't crash abruptly `publish_via_partition_root` is set to `false`.
 ///
 /// The current behavior is to silently not perform replication, but we might want to refine this behavior

From 0ac142a4c2ec0021623067cc44afe6b3f5152283 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Fri, 24 Oct 2025 17:16:14 +0200
Subject: [PATCH 19/26] Improve

---
 etl/tests/pipeline_with_partitioned_table.rs | 80 ++++++++++++++++++--
 1 file changed, 74 insertions(+), 6 deletions(-)

diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs
index 7d6a82e3f..04f12471c 100644
--- a/etl/tests/pipeline_with_partitioned_table.rs
+++ b/etl/tests/pipeline_with_partitioned_table.rs
@@ -13,6 +13,7 @@ use etl::types::PipelineId;
 use etl::types::TableId;
 use etl_telemetry::tracing::init_test_tracing;
 use rand::random;
+use tokio_postgres::types::Type;
 
 /// Tests that initial COPY replicates all rows from a partitioned table.
 /// Only the parent table is tracked, not individual child partitions.
@@ -70,13 +71,43 @@ async fn partitioned_table_copy_replicates_existing_data() {
 
     let _ = pipeline.shutdown_and_wait().await;
 
+    // Verify table schema was discovered correctly.
+    let table_schemas = state_store.get_table_schemas().await;
+    assert!(table_schemas.contains_key(&parent_table_id));
+
+    let parent_schema = &table_schemas[&parent_table_id];
+    assert_eq!(parent_schema.id, parent_table_id);
+    assert_eq!(parent_schema.name, table_name);
+
+    // Verify columns are correctly discovered.
+    assert_eq!(parent_schema.column_schemas.len(), 3);
+
+    // Check id column (added by default).
+    let id_column = &parent_schema.column_schemas[0];
+    assert_eq!(id_column.name, "id");
+    assert_eq!(id_column.typ, Type::INT8);
+    assert!(!id_column.nullable);
+    assert!(id_column.primary);
+
+    // Check data column.
+    let data_column = &parent_schema.column_schemas[1];
+    assert_eq!(data_column.name, "data");
+    assert_eq!(data_column.typ, Type::TEXT);
+    assert!(!data_column.nullable);
+    assert!(!data_column.primary);
+
+    // Check partition_key column.
+    let partition_key_column = &parent_schema.column_schemas[2];
+    assert_eq!(partition_key_column.name, "partition_key");
+    assert_eq!(partition_key_column.typ, Type::INT4);
+    assert!(!partition_key_column.nullable);
+    assert!(partition_key_column.primary);
+
     let table_rows = destination.get_table_rows().await;
     let total_rows: usize = table_rows.values().map(|rows| rows.len()).sum();
-
     assert_eq!(total_rows, 3);
 
     let table_states = state_store.get_table_replication_states().await;
-
     assert!(table_states.contains_key(&parent_table_id));
     assert_eq!(table_states.len(), 1);
 
@@ -1137,8 +1168,7 @@ async fn nested_partitioned_table_copy_and_cdc() {
     database
         .run_sql(&format!(
             "create table {} partition of {} for values from (1) to (50)",
-            p2_sub1_qualified,
-            p2_qualified
+            p2_sub1_qualified, p2_qualified
         ))
         .await
         .unwrap();
@@ -1148,8 +1178,7 @@ async fn nested_partitioned_table_copy_and_cdc() {
     database
         .run_sql(&format!(
             "create table {} partition of {} for values from (50) to (100)",
-            p2_sub2_qualified,
-            p2_qualified
+            p2_sub2_qualified, p2_qualified
         ))
         .await
         .unwrap();
@@ -1196,6 +1225,45 @@ async fn nested_partitioned_table_copy_and_cdc() {
 
     parent_sync_done.notified().await;
 
+    // Verify table schema was discovered correctly for nested partitioned table.
+    let table_schemas = state_store.get_table_schemas().await;
+    assert!(table_schemas.contains_key(&parent_table_id));
+
+    let parent_schema = &table_schemas[&parent_table_id];
+    assert_eq!(parent_schema.id, parent_table_id);
+    assert_eq!(parent_schema.name, table_name);
+
+    // Verify columns are correctly discovered (includes sub_partition_key).
+    assert_eq!(parent_schema.column_schemas.len(), 4);
+
+    // Check id column (added by default).
+    let id_column = &parent_schema.column_schemas[0];
+    assert_eq!(id_column.name, "id");
+    assert_eq!(id_column.typ, Type::INT8);
+    assert!(!id_column.nullable);
+    assert!(id_column.primary);
+
+    // Check data column.
+    let data_column = &parent_schema.column_schemas[1];
+    assert_eq!(data_column.name, "data");
+    assert_eq!(data_column.typ, Type::TEXT);
+    assert!(!data_column.nullable);
+    assert!(!data_column.primary);
+
+    // Check partition_key column (part of primary key).
+    let partition_key_column = &parent_schema.column_schemas[2];
+    assert_eq!(partition_key_column.name, "partition_key");
+    assert_eq!(partition_key_column.typ, Type::INT4);
+    assert!(!partition_key_column.nullable);
+    assert!(partition_key_column.primary);
+
+    // Check sub_partition_key column (part of primary key for nested partitioning).
+    let sub_partition_key_column = &parent_schema.column_schemas[3];
+    assert_eq!(sub_partition_key_column.name, "sub_partition_key");
+    assert_eq!(sub_partition_key_column.typ, Type::INT4);
+    assert!(!sub_partition_key_column.nullable);
+    assert!(sub_partition_key_column.primary);
+
     // Verify initial COPY replicated all 3 rows.
     let table_rows = destination.get_table_rows().await;
     let total_rows: usize = table_rows.values().map(|rows| rows.len()).sum();

From 3fec642b433a64c0ddfe30e430384d6db5e58885 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Fri, 24 Oct 2025 17:20:44 +0200
Subject: [PATCH 20/26] Improve

---
 etl/tests/pipeline_with_partitioned_table.rs | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs
index 04f12471c..90ea4cff4 100644
--- a/etl/tests/pipeline_with_partitioned_table.rs
+++ b/etl/tests/pipeline_with_partitioned_table.rs
@@ -1167,8 +1167,7 @@ async fn nested_partitioned_table_copy_and_cdc() {
     let p2_sub1_qualified = format!("{}.{}", table_name.schema, p2_sub1_name);
     database
         .run_sql(&format!(
-            "create table {} partition of {} for values from (1) to (50)",
-            p2_sub1_qualified, p2_qualified
+            "create table {p2_sub1_qualified} partition of {p2_qualified} for values from (1) to (50)"
         ))
         .await
         .unwrap();
@@ -1177,8 +1176,7 @@ async fn nested_partitioned_table_copy_and_cdc() {
     let p2_sub2_qualified = format!("{}.{}", table_name.schema, p2_sub2_name);
     database
         .run_sql(&format!(
-            "create table {} partition of {} for values from (50) to (100)",
-            p2_sub2_qualified, p2_qualified
+            "create table {p2_sub2_qualified} partition of {p2_qualified} for values from (50) to (100)"
         ))
         .await
         .unwrap();

From bf4ef9ffe2d693621126a0440d18e56045cd4fee Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Mon, 27 Oct 2025 10:12:10 +0100
Subject: [PATCH 21/26] Improve

---
 etl/src/replication/client.rs                | 18 ++--------
 etl/tests/pipeline_with_partitioned_table.rs | 37 ++++++++++++++++----
 2 files changed, 32 insertions(+), 23 deletions(-)

diff --git a/etl/src/replication/client.rs b/etl/src/replication/client.rs
index 5a03e0d6c..ef4d1e7bd 100644
--- a/etl/src/replication/client.rs
+++ b/etl/src/replication/client.rs
@@ -426,27 +426,13 @@ impl PgReplicationClient {
         let query = format!(
             r#"
             with recursive pub_tables as (
-                -- Get explicit tables from publication (for regular publications)
-                select r.prrelid as oid
-                from pg_publication_rel r
-                join pg_publication p on p.oid = r.prpubid
-                where p.pubname = {pub}
-
-                union all
-
-                -- Get tables from pg_publication_tables (for ALL TABLES and FOR TABLES IN SCHEMA)
-                -- Only executes if pg_publication_rel is empty for this publication
+                -- Get all tables from publication (pg_publication_tables includes explicit tables,
+                -- ALL TABLES publications, and FOR TABLES IN SCHEMA publications)
                 select c.oid
                 from pg_publication_tables pt
                 join pg_class c on c.relname = pt.tablename
                 join pg_namespace n on n.oid = c.relnamespace and n.nspname = pt.schemaname
                 where pt.pubname = {pub}
-                and not exists (
-                    select 1
-                    from pg_publication_rel r
-                    join pg_publication p on p.oid = r.prpubid
-                    where p.pubname = {pub}
-                )
             ),
             hierarchy(relid) as (
                 -- Start with published tables
diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs
index 90ea4cff4..8a8b4161c 100644
--- a/etl/tests/pipeline_with_partitioned_table.rs
+++ b/etl/tests/pipeline_with_partitioned_table.rs
@@ -198,13 +198,6 @@ async fn partitioned_table_copy_and_streams_new_data_from_new_partition() {
     let _ = pipeline.shutdown_and_wait().await;
 
     let table_rows = destination.get_table_rows().await;
-    let total_rows: usize = table_rows.values().map(|rows| rows.len()).sum();
-    assert_eq!(total_rows, 2);
-
-    let table_states = state_store.get_table_replication_states().await;
-    assert!(table_states.contains_key(&parent_table_id));
-    assert_eq!(table_states.len(), 1);
-
     let parent_table_rows = table_rows
         .iter()
         .filter(|(table_id, _)| **table_id == parent_table_id)
@@ -296,6 +289,21 @@ async fn partition_drop_does_not_emit_delete_or_truncate() {
         .await
         .unwrap();
 
+    // Insert a row into an existing partition to ensure the pipeline is still processing events.
+    let inserts_notify = destination
+        .wait_for_events_count(vec![(EventType::Insert, 1)])
+        .await;
+
+    database
+        .run_sql(&format!(
+            "insert into {} (data, partition_key) values ('event3', 150)",
+            table_name.as_quoted_identifier()
+        ))
+        .await
+        .unwrap();
+
+    inserts_notify.notified().await;
+
     let _ = pipeline.shutdown_and_wait().await;
 
     let events_after = destination.get_events().await;
@@ -447,6 +455,21 @@ async fn child_table_truncate_does_not_emit_truncate_event() {
         .await
         .unwrap();
 
+    // Insert a row into an existing partition to ensure the pipeline is still processing events.
+    let inserts_notify = destination
+        .wait_for_events_count(vec![(EventType::Insert, 1)])
+        .await;
+
+    database
+        .run_sql(&format!(
+            "insert into {} (data, partition_key) values ('event3', 150)",
+            table_name.as_quoted_identifier()
+        ))
+        .await
+        .unwrap();
+
+    inserts_notify.notified().await;
+
     let _ = pipeline.shutdown_and_wait().await;
 
     let events = destination.get_events().await;

From dbede951064454d86cf8f038930cacbc3f52309b Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Mon, 27 Oct 2025 11:31:04 +0100
Subject: [PATCH 22/26] Improve

---
 etl/src/pipeline.rs                          | 36 ++++++++++
 etl/src/replication/client.rs                | 54 ++++++++++++++
 etl/tests/pipeline_with_partitioned_table.rs | 74 +++++++-------------
 3 files changed, 117 insertions(+), 47 deletions(-)

diff --git a/etl/src/pipeline.rs b/etl/src/pipeline.rs
index e021886d4..d5eb7b3c7 100644
--- a/etl/src/pipeline.rs
+++ b/etl/src/pipeline.rs
@@ -300,6 +300,42 @@ where
             publication_table_ids.len()
         );
 
+        // Validate that the publication is configured correctly for partitioned tables.
+        //
+        // When `publish_via_partition_root = false`, logical replication messages contain
+        // child partition OIDs instead of parent table OIDs. Since our schema cache only
+        // contains parent table IDs (from `get_publication_table_ids`), relation messages
+        // with child OIDs would cause pipeline failures.
+        let publish_via_partition_root = replication_client
+            .get_publish_via_partition_root(&self.config.publication_name)
+            .await?;
+
+        if !publish_via_partition_root {
+            let has_partitioned_tables = replication_client
+                .has_partitioned_tables(&publication_table_ids)
+                .await?;
+
+            if has_partitioned_tables {
+                error!(
+                    "publication '{}' has publish_via_partition_root=false but contains partitioned table(s)",
+                    self.config.publication_name
+                );
+
+                bail!(
+                    ErrorKind::ConfigError,
+                    "Invalid publication configuration for partitioned tables",
+                    format!(
+                        "The publication '{}' contains partitioned tables but has publish_via_partition_root=false. \
+                         This configuration causes replication messages to use child partition OIDs, which are not \
+                         tracked by the pipeline and will cause failures. Please recreate the publication with \
+                         publish_via_partition_root=true or use: ALTER PUBLICATION {} SET (publish_via_partition_root = true);",
+                        self.config.publication_name,
+                        self.config.publication_name
+                    )
+                );
+            }
+        }
+
         self.store.load_table_replication_states().await?;
         let table_replication_states = self.store.get_table_replication_states().await?;
 
diff --git a/etl/src/replication/client.rs b/etl/src/replication/client.rs
index ef4d1e7bd..40e617584 100644
--- a/etl/src/replication/client.rs
+++ b/etl/src/replication/client.rs
@@ -387,6 +387,60 @@ impl PgReplicationClient {
         Ok(false)
     }
 
+    /// Retrieves the `publish_via_partition_root` setting for a publication.
+    ///
+    /// Returns `true` if the publication is configured to send replication messages using
+    /// the parent table OID, or `false` if it sends them using child partition OIDs.
+    pub async fn get_publish_via_partition_root(&self, publication: &str) -> EtlResult<bool> {
+        let query = format!(
+            "select pubviaroot from pg_publication where pubname = {};",
+            quote_literal(publication)
+        );
+
+        for msg in self.client.simple_query(&query).await? {
+            if let SimpleQueryMessage::Row(row) = msg {
+                let pubviaroot =
+                    Self::get_row_value::<String>(&row, "pubviaroot", "pg_publication").await?;
+                return Ok(pubviaroot == "t");
+            }
+        }
+
+        bail!(
+            ErrorKind::ConfigError,
+            "Publication not found",
+            format!("Publication '{}' not found in database", publication)
+        );
+    }
+
+    /// Checks if any of the provided table IDs are partitioned tables.
+    ///
+    /// A partitioned table is one where `relkind = 'p'` in `pg_class`.
+    /// Returns `true` if at least one table is partitioned, `false` otherwise.
+    pub async fn has_partitioned_tables(&self, table_ids: &[TableId]) -> EtlResult<bool> {
+        if table_ids.is_empty() {
+            return Ok(false);
+        }
+
+        let table_oids_list = table_ids
+            .iter()
+            .map(|id| id.0.to_string())
+            .collect::<Vec<_>>()
+            .join(", ");
+
+        let query = format!(
+            "select 1 from pg_class where oid in ({}) and relkind = 'p' limit 1;",
+            table_oids_list
+        );
+
+        for msg in self.client.simple_query(&query).await? {
+            if let SimpleQueryMessage::Row(_) = msg {
+                return Ok(true);
+            }
+        }
+
+        Ok(false)
+    }
+
     /// Retrieves the names of all tables included in a publication.
     pub async fn get_publication_table_names(
         &self,
diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs
index 8a8b4161c..8dae9dfc0 100644
--- a/etl/tests/pipeline_with_partitioned_table.rs
+++ b/etl/tests/pipeline_with_partitioned_table.rs
@@ -1333,12 +1333,16 @@ async fn nested_partitioned_table_copy_and_cdc() {
     assert_eq!(parent_inserts.len(), 3);
 }
 
-/// Tests that the system doesn't crash abruptly `publish_via_partition_root` is set to `false`.
+/// Tests that the pipeline throws an error during startup when `publish_via_partition_root`
+/// is set to `false` and the publication contains partitioned tables.
 ///
-/// The current behavior is to silently not perform replication, but we might want to refine this behavior
-/// and throw an error when we detect that there are partitioned tables in a publication and the setting
-/// is `false`. This way, we would be able to avoid forcing the user to always set `publish_via_partition_root=true`
-/// when it's unnecessary.
+/// When `publish_via_partition_root = false`, logical replication messages contain child
+/// partition OIDs instead of parent table OIDs. Since the pipeline's schema cache only
+/// tracks parent table IDs, this configuration would cause pipeline failures when relation
+/// messages arrive with unknown child OIDs.
+///
+/// The pipeline validates this configuration at startup and rejects it with a clear error
+/// message instructing the user to enable `publish_via_partition_root`.
 #[tokio::test(flavor = "multi_thread")]
 async fn partitioned_table_with_publish_via_root_false() {
     init_test_tracing();
@@ -1347,7 +1351,7 @@ async fn partitioned_table_with_publish_via_root_false() {
     let table_name = test_table_name("partitioned_events");
     let partition_specs = [("p1", "from (1) to (100)"), ("p2", "from (100) to (200)")];
 
-    let (parent_table_id, _partition_table_ids) =
+    let (_parent_table_id, _partition_table_ids) =
         create_partitioned_table(&database, table_name.clone(), &partition_specs)
             .await
             .unwrap();
@@ -1373,51 +1377,27 @@ async fn partitioned_table_with_publish_via_root_false() {
     let mut pipeline = create_pipeline(
         &database.config,
         pipeline_id,
-        publication_name,
+        publication_name.clone(),
         state_store.clone(),
         destination.clone(),
     );
 
-    // Wait on the sync done of the parent.
-    let parent_sync_done = state_store
-        .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone)
-        .await;
-
-    pipeline.start().await.unwrap();
-
-    // Wait on the sync done of the parent.
-    parent_sync_done.notified().await;
-
-    // Wait for the COMMIT event of the insert in the parent table. COMMIT events are always
-    // processed unconditionally because they don't contain relation-specific information.
-    //
-    // We use the COMMIT event to verify transaction processing: we can check whether the
-    // transaction's component events were captured. In this case, they should NOT be present
-    // because when `publication_via_partition_root` is `false`, events are tagged with child
-    // table OIDs. Since these child table OIDs are unknown to us (we always try to find the parent oid),
-    // those events are skipped.
-    let commit = destination
-        .wait_for_events_count(vec![(EventType::Commit, 1)])
-        .await;
-
-    database
-        .run_sql(&format!(
-            "insert into {} (data, partition_key) values ('event1', 50)",
-            table_name.as_quoted_identifier()
-        ))
-        .await
-        .unwrap();
-
-    commit.notified().await;
+    // The pipeline should fail to start due to invalid configuration.
+    let start_result = pipeline.start().await;
+    assert!(start_result.is_err());
 
-    pipeline.shutdown_and_wait().await.unwrap();
+    let err = start_result.unwrap_err();
+    let err_message = err.to_string();
 
-    // No inserts should be captured for the reasons explained above.
-    let events = destination.get_events().await;
-    let grouped_events = group_events_by_type_and_table_id(&events);
-    let parent_inserts = grouped_events
-        .get(&(EventType::Insert, parent_table_id))
-        .cloned()
-        .unwrap_or_default();
-    assert!(parent_inserts.is_empty());
+    // Verify the error message contains the expected information.
+    assert!(
+        err_message.contains("publish_via_partition_root"),
+        "Error message should mention publish_via_partition_root, got: {}",
+        err_message
+    );
+    assert!(
+        err_message.contains(&publication_name),
+        "Error message should mention the publication name, got: {}",
+        err_message
+    );
 }

From 899ef6938bb4f069890abc75499ac704971af76e Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Mon, 27 Oct 2025 11:32:54 +0100
Subject: [PATCH 23/26] Improve

---
 etl/tests/pipeline_with_partitioned_table.rs | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs
index 8dae9dfc0..826ecc41e 100644
--- a/etl/tests/pipeline_with_partitioned_table.rs
+++ b/etl/tests/pipeline_with_partitioned_table.rs
@@ -14,6 +14,7 @@ use etl::types::TableId;
 use etl_telemetry::tracing::init_test_tracing;
 use rand::random;
 use tokio_postgres::types::Type;
+use etl::error::ErrorKind;
 
 /// Tests that initial COPY replicates all rows from a partitioned table.
 /// Only the parent table is tracked, not individual child partitions.
@@ -1383,21 +1384,6 @@ async fn partitioned_table_with_publish_via_root_false() {
     );
 
     // The pipeline should fail to start due to invalid configuration.
-    let start_result = pipeline.start().await;
-    assert!(start_result.is_err());
-
-    let err = start_result.unwrap_err();
-    let err_message = err.to_string();
-
-    // Verify the error message contains the expected information.
-    assert!(
-        err_message.contains("publish_via_partition_root"),
-        "Error message should mention publish_via_partition_root, got: {}",
-        err_message
-    );
-    assert!(
-        err_message.contains(&publication_name),
-        "Error message should mention the publication name, got: {}",
-        err_message
-    );
+    let err = pipeline.start().await.err().unwrap();
+    assert_eq!(err.kind(), ErrorKind::ConfigError);
 }

From fb43b9d8ab02fefa630c3fcc92b98949e28d26e5 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Mon, 27 Oct 2025 11:35:19 +0100
Subject: [PATCH 24/26] Improve

---
 etl/src/pipeline.rs                          | 3 +--
 etl/src/replication/client.rs                | 3 +--
 etl/tests/pipeline_with_partitioned_table.rs | 2 +-
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/etl/src/pipeline.rs b/etl/src/pipeline.rs
index d5eb7b3c7..36b4b1dac 100644
--- a/etl/src/pipeline.rs
+++ b/etl/src/pipeline.rs
@@ -329,8 +329,7 @@ where
                          This configuration causes replication messages to use child partition OIDs, which are not \
                          tracked by the pipeline and will cause failures. Please recreate the publication with \
                          publish_via_partition_root=true or use: ALTER PUBLICATION {} SET (publish_via_partition_root = true);",
-                        self.config.publication_name,
-                        self.config.publication_name
+                        self.config.publication_name, self.config.publication_name
                     )
                 );
             }
diff --git a/etl/src/replication/client.rs b/etl/src/replication/client.rs
index 40e617584..9797679e8 100644
--- a/etl/src/replication/client.rs
+++ b/etl/src/replication/client.rs
@@ -428,8 +428,7 @@ impl PgReplicationClient {
             .join(", ");
 
         let query = format!(
-            "select 1 from pg_class where oid in ({}) and relkind = 'p' limit 1;",
-            table_oids_list
+            "select 1 from pg_class where oid in ({table_oids_list}) and relkind = 'p' limit 1;"
         );
 
         for msg in self.client.simple_query(&query).await? {
diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs
index 826ecc41e..794f665ed 100644
--- a/etl/tests/pipeline_with_partitioned_table.rs
+++ b/etl/tests/pipeline_with_partitioned_table.rs
@@ -1,6 +1,7 @@
 #![cfg(feature = "test-utils")]
 
 use etl::destination::memory::MemoryDestination;
+use etl::error::ErrorKind;
 use etl::state::table::TableReplicationPhaseType;
 use etl::test_utils::database::{spawn_source_database, test_table_name};
 use etl::test_utils::event::group_events_by_type_and_table_id;
@@ -14,7 +15,6 @@ use etl::types::TableId;
 use etl_telemetry::tracing::init_test_tracing;
 use rand::random;
 use tokio_postgres::types::Type;
-use etl::error::ErrorKind;
 
 /// Tests that initial COPY replicates all rows from a partitioned table.
 /// Only the parent table is tracked, not individual child partitions.

From df3044a40cc08c334972a6d8ab23f6e654a94efd Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Mon, 27 Oct 2025 12:09:41 +0100
Subject: [PATCH 25/26] Improve

---
 etl/tests/pipeline_with_partitioned_table.rs | 47 +++++++++++++++++++-
 1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs
index 794f665ed..f9bd3a377 100644
--- a/etl/tests/pipeline_with_partitioned_table.rs
+++ b/etl/tests/pipeline_with_partitioned_table.rs
@@ -8,7 +8,9 @@ use etl::test_utils::event::group_events_by_type_and_table_id;
 use etl::test_utils::notify::NotifyingStore;
 use etl::test_utils::pipeline::create_pipeline;
 use etl::test_utils::test_destination_wrapper::TestDestinationWrapper;
-use etl::test_utils::test_schema::create_partitioned_table;
+use etl::test_utils::test_schema::{
+    TableSelection, create_partitioned_table, setup_test_database_schema,
+};
 use etl::types::EventType;
 use etl::types::PipelineId;
 use etl::types::TableId;
@@ -1345,7 +1347,7 @@ async fn nested_partitioned_table_copy_and_cdc() {
 /// The pipeline validates this configuration at startup and rejects it with a clear error
 /// message instructing the user to enable `publish_via_partition_root`.
 #[tokio::test(flavor = "multi_thread")]
-async fn partitioned_table_with_publish_via_root_false() {
+async fn partitioned_table_with_publish_via_partition_root_false_and_partitioned_tables() {
     init_test_tracing();
     let database = spawn_source_database().await;
 
@@ -1387,3 +1389,44 @@ async fn partitioned_table_with_publish_via_root_false() {
     let err = pipeline.start().await.err().unwrap();
     assert_eq!(err.kind(), ErrorKind::ConfigError);
 }
+
+/// Tests that the pipeline doesn't throw an error when `publish_via_partition_root=false` and there
+/// are no partitioned tables in the tables of the publication.
+#[tokio::test(flavor = "multi_thread")]
+async fn partitioned_table_with_publish_via_partition_root_false_and_no_partitioned_tables() {
+    init_test_tracing();
+    let database = spawn_source_database().await;
+
+    let table_name = test_table_name("non_partitioned_events");
+    database
+        .create_table(
+            table_name.clone(),
+            true,
+            &[("description", "text not null")],
+        )
+        .await
+        .unwrap();
+
+    let publication_name = "test_non_partitioned_pub".to_string();
+    database
+        .create_publication_with_config(&publication_name, std::slice::from_ref(&table_name), false)
+        .await
+        .unwrap();
+
+    let state_store = NotifyingStore::new();
+    let destination = TestDestinationWrapper::wrap(MemoryDestination::new());
+
+    let pipeline_id: PipelineId = random();
+    let mut pipeline = create_pipeline(
+        &database.config,
+        pipeline_id,
+        publication_name.clone(),
+        state_store.clone(),
+        destination.clone(),
+    );
+
+    // The pipeline should start and stop successfully.
+    pipeline.start().await.unwrap();
+    let result = pipeline.shutdown_and_wait().await;
+    assert!(result.is_ok());
+}

From 17b0357326769016f9bc0d56f3667edd4872a345 Mon Sep 17 00:00:00 2001
From: Riccardo Busetti <riccardo.busetti@supabase.io>
Date: Mon, 27 Oct 2025 12:17:16 +0100
Subject: [PATCH 26/26] Improve

---
 etl/tests/pipeline_with_partitioned_table.rs | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs
index f9bd3a377..f096bff1f 100644
--- a/etl/tests/pipeline_with_partitioned_table.rs
+++ b/etl/tests/pipeline_with_partitioned_table.rs
@@ -8,9 +8,7 @@ use etl::test_utils::event::group_events_by_type_and_table_id;
 use etl::test_utils::notify::NotifyingStore;
 use etl::test_utils::pipeline::create_pipeline;
 use etl::test_utils::test_destination_wrapper::TestDestinationWrapper;
-use etl::test_utils::test_schema::{
-    TableSelection, create_partitioned_table, setup_test_database_schema,
-};
+use etl::test_utils::test_schema::create_partitioned_table;
 use etl::types::EventType;
 use etl::types::PipelineId;
 use etl::types::TableId;