From 282b0e62623824d645ccd4e1a1ab356f6792b25e Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 21 Oct 2025 12:42:50 +0200 Subject: [PATCH 01/26] feat(core): Add partitioned table support --- docs/how-to/configure-postgres.md | 22 ++ etl-api/src/db/publications.rs | 3 + etl-postgres/src/tokio/test_utils.rs | 2 +- etl/src/replication/client.rs | 120 ++++++-- etl/src/test_utils/test_schema.rs | 71 +++++ etl/src/types/mod.rs | 2 +- etl/tests/pipeline_with_partitioned_table.rs | 303 +++++++++++++++++++ etl/tests/replication.rs | 43 ++- 8 files changed, 544 insertions(+), 22 deletions(-) create mode 100644 etl/tests/pipeline_with_partitioned_table.rs diff --git a/docs/how-to/configure-postgres.md b/docs/how-to/configure-postgres.md index fe57cd07b..bf99db0a4 100644 --- a/docs/how-to/configure-postgres.md +++ b/docs/how-to/configure-postgres.md @@ -116,6 +116,28 @@ CREATE PUBLICATION all_tables FOR ALL TABLES; CREATE PUBLICATION inserts_only FOR TABLE users WITH (publish = 'insert'); ``` +#### Partitioned Tables + +If you want to replicate partitioned tables, you must use `publish_via_partition_root = true` when creating your publication. This option tells Postgres to treat the [partitioned table as a single table](https://www.postgresql.org/docs/current/sql-createpublication.html#SQL-CREATEPUBLICATION-PARAMS-WITH-PUBLISH-VIA-PARTITION-ROOT) from the replication perspective, rather than replicating each partition individually. All changes to any partition will be published as changes to the parent table: + +```sql +-- Create publication with partitioned table support +CREATE PUBLICATION my_publication FOR TABLE users, orders WITH (publish_via_partition_root = true); + +-- For all tables including partitioned tables +CREATE PUBLICATION all_tables FOR ALL TABLES WITH (publish_via_partition_root = true); +``` + +**Limitation:** If this option is enabled, `TRUNCATE` operations performed directly on individual partitions are not replicated. To replicate a truncate operation, you must execute it on the parent table instead: + +```sql +-- This will NOT be replicated +TRUNCATE TABLE orders_2024_q1; + +-- This WILL be replicated +TRUNCATE TABLE orders; +``` + ### Managing Publications ```sql diff --git a/etl-api/src/db/publications.rs b/etl-api/src/db/publications.rs index 38a4ab575..5c01ac859 100644 --- a/etl-api/src/db/publications.rs +++ b/etl-api/src/db/publications.rs @@ -43,6 +43,9 @@ pub async fn create_publication( } } + // Ensure partitioned tables publish via ancestor/root schema for logical replication + query.push_str(" with (publish_via_partition_root = true)"); + pool.execute(query.as_str()).await?; Ok(()) } diff --git a/etl-postgres/src/tokio/test_utils.rs b/etl-postgres/src/tokio/test_utils.rs index 6692e486b..7aad2b62b 100644 --- a/etl-postgres/src/tokio/test_utils.rs +++ b/etl-postgres/src/tokio/test_utils.rs @@ -61,7 +61,7 @@ impl PgDatabase { .collect::>(); let create_publication_query = format!( - "create publication {} for table {}", + "create publication {} for table {} with (publish_via_partition_root = true)", publication_name, table_names.join(", ") ); diff --git a/etl/src/replication/client.rs b/etl/src/replication/client.rs index 2746f6c47..16f72a296 100644 --- a/etl/src/replication/client.rs +++ b/etl/src/replication/client.rs @@ -407,28 +407,66 @@ impl PgReplicationClient { } /// Retrieves the OIDs of all tables included in a publication. + /// + /// For partitioned tables with `publish_via_partition_root=true`, this returns only the parent + /// table OID. The query uses a recursive CTE to walk up the partition inheritance hierarchy + /// and identify root tables that have no parent themselves. pub async fn get_publication_table_ids( &self, publication_name: &str, ) -> EtlResult> { - let publication_query = format!( - "select c.oid from pg_publication_tables pt - join pg_class c on c.relname = pt.tablename - join pg_namespace n on n.oid = c.relnamespace AND n.nspname = pt.schemaname - where pt.pubname = {};", - quote_literal(publication_name) + let query = format!( + r#" + with recursive has_rel as ( + -- Check if publication uses pg_publication_rel (explicit table list) + select exists( + select 1 + from pg_publication_rel r + join pg_publication p on p.oid = r.prpubid + where p.pubname = {pub} + ) as has + ), + pub_tables as ( + -- If publication has explicit relations, use pg_publication_rel + select r.prrelid as oid + from pg_publication_rel r + join pg_publication p on p.oid = r.prpubid + where p.pubname = {pub} and (select has from has_rel) + union all + -- Otherwise, use pg_publication_tables (for ALL TABLES publications) + select c.oid + from pg_publication_tables pt + join pg_class c on c.relname = pt.tablename + where pt.pubname = {pub} and not (select has from has_rel) + ), + recurse(relid) as ( + -- Start with all published tables + select oid from pub_tables + union all + -- Recursively walk up to find parent tables in inheritance hierarchy + select i.inhparent + from pg_inherits i + join recurse r on r.relid = i.inhrelid + ) + -- Return only root tables (those without a parent) + select distinct relid as oid + from recurse r + where not exists ( + select 1 from pg_inherits i where i.inhrelid = r.relid + ); + "#, + pub = quote_literal(publication_name) ); - let mut table_ids = vec![]; - for msg in self.client.simple_query(&publication_query).await? { + let mut roots = vec![]; + for msg in self.client.simple_query(&query).await? { if let SimpleQueryMessage::Row(row) = msg { - // For the sake of simplicity, we refer to the table oid as table id. let table_id = Self::get_row_value::(&row, "oid", "pg_class").await?; - table_ids.push(table_id); + roots.push(table_id); } } - Ok(table_ids) + Ok(roots) } /// Starts a logical replication stream from the specified publication and slot. @@ -662,7 +700,8 @@ impl PgReplicationClient { when 0 then true else (a.attnum in (select * from pub_attrs)) end - )", + )" + .to_string(), ) } else { // Postgres 14 or earlier or unknown, fallback to no column-level filtering @@ -677,20 +716,65 @@ impl PgReplicationClient { )", publication = quote_literal(publication), ), - "and (select count(*) from pub_table) > 0", + format!( + "and ((select count(*) from pub_table) > 0 or exists( + -- Also allow if parent table is in publication (for partitioned tables) + select 1 from pg_inherits i + join pg_publication_rel r on r.prrelid = i.inhparent + join pg_publication p on p.oid = r.prpubid + where i.inhrelid = {table_id} and p.pubname = {publication} + ))", + publication = quote_literal(publication), + ), ) } } else { - ("".into(), "") + ("".to_string(), "".to_string()) + }; + + let has_pub_cte = !pub_cte.is_empty(); + + let cte_prefix = if has_pub_cte { + // If there's already a pub_cte WITH clause, add our CTEs to it with a comma + format!("{pub_cte},") + } else { + // If no pub_cte, start our own WITH clause (no need for RECURSIVE) + "with ".to_string() }; let column_info_query = format!( - "{pub_cte} + "{cte_prefix} + -- Find direct parent of current table (if it's a partition) + direct_parent as ( + select i.inhparent as parent_oid + from pg_inherits i + where i.inhrelid = {table_id}::oid + limit 1 + ), + -- Get parent table's primary key columns + parent_pk_cols as ( + select array_agg(a.attname order by x.n) as pk_column_names + from pg_constraint con + join unnest(con.conkey) with ordinality as x(attnum, n) on true + join pg_attribute a on a.attrelid = con.conrelid and a.attnum = x.attnum + join direct_parent dp on con.conrelid = dp.parent_oid + where con.contype = 'p' + group by con.conname + ) select a.attname, a.atttypid, a.atttypmod, a.attnotnull, - coalesce(i.indisprimary, false) as primary + case + -- Direct primary key on this relation + when coalesce(i.indisprimary, false) = true then true + -- Inherit primary key from parent partitioned table if column name matches + when exists ( + select 1 from parent_pk_cols pk + where a.attname = any(pk.pk_column_names) + ) then true + else false + end as primary from pg_attribute a left join pg_index i on a.attrelid = i.indrelid @@ -807,9 +891,9 @@ impl PgReplicationClient { ) } else { format!( - r#"copy {} ({}) to stdout with (format text);"#, - table_name.as_quoted_identifier(), + r#"copy (select {} from {}) to stdout with (format text);"#, column_list, + table_name.as_quoted_identifier(), ) }; diff --git a/etl/src/test_utils/test_schema.rs b/etl/src/test_utils/test_schema.rs index 8faa6449b..2e4047fc5 100644 --- a/etl/src/test_utils/test_schema.rs +++ b/etl/src/test_utils/test_schema.rs @@ -127,6 +127,77 @@ pub async fn setup_test_database_schema( } } +/// Creates a partitioned table with the given name and partitions. +/// +/// This function creates: +/// 1. A parent partitioned table with a primary key +/// 2. Several child partitions based on the provided partition specifications +/// +/// Returns the table ID of the parent table and a list of partition table IDs. +pub async fn create_partitioned_table( + database: &PgDatabase, + table_name: TableName, + partition_specs: &[(&str, &str)], // (partition_name, partition_constraint) +) -> Result<(TableId, Vec), tokio_postgres::Error> { + let create_parent_query = format!( + "create table {} ( + id bigserial, + data text NOT NULL, + partition_key integer NOT NULL, + primary key (id, partition_key) + ) partition by range (partition_key)", + table_name.as_quoted_identifier() + ); + + database.run_sql(&create_parent_query).await?; + + let parent_row = database + .client + .as_ref() + .unwrap() + .query_one( + "select c.oid from pg_class c join pg_namespace n on n.oid = c.relnamespace + where n.nspname = $1 and c.relname = $2", + &[&table_name.schema, &table_name.name], + ) + .await?; + + let parent_table_id: TableId = parent_row.get(0); + let mut partition_table_ids = Vec::new(); + + for (partition_name, partition_constraint) in partition_specs { + let partition_table_name = TableName::new( + table_name.schema.clone(), + format!("{}_{}", table_name.name, partition_name), + ); + + let create_partition_query = format!( + "create table {} partition of {} for values {}", + partition_table_name.as_quoted_identifier(), + table_name.as_quoted_identifier(), + partition_constraint + ); + + database.run_sql(&create_partition_query).await?; + + let partition_row = database + .client + .as_ref() + .unwrap() + .query_one( + "select c.oid from pg_class c join pg_namespace n on n.oid = c.relnamespace + where n.nspname = $1 and c.relname = $2", + &[&partition_table_name.schema, &partition_table_name.name], + ) + .await?; + + let partition_table_id: TableId = partition_row.get(0); + partition_table_ids.push(partition_table_id); + } + + Ok((parent_table_id, partition_table_ids)) +} + /// Inserts users data into the database for testing purposes. pub async fn insert_users_data( client: &mut PgDatabase, diff --git a/etl/src/types/mod.rs b/etl/src/types/mod.rs index 05103904c..867af9a23 100644 --- a/etl/src/types/mod.rs +++ b/etl/src/types/mod.rs @@ -13,7 +13,7 @@ pub use event::*; pub use pipeline::*; pub use table_row::*; -pub use crate::conversions::numeric::PgNumeric; +pub use crate::conversions::numeric::{PgNumeric, Sign}; // Re-exports. pub use etl_postgres::types::*; diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs new file mode 100644 index 000000000..a40e69b49 --- /dev/null +++ b/etl/tests/pipeline_with_partitioned_table.rs @@ -0,0 +1,303 @@ +#![cfg(feature = "test-utils")] + +use etl::destination::memory::MemoryDestination; +use etl::state::table::TableReplicationPhaseType; +use etl::test_utils::database::{spawn_source_database, test_table_name}; +use etl::test_utils::event::group_events_by_type_and_table_id; +use etl::test_utils::notify::NotifyingStore; +use etl::test_utils::pipeline::create_pipeline; +use etl::test_utils::test_destination_wrapper::TestDestinationWrapper; +use etl::test_utils::test_schema::create_partitioned_table; +use etl::types::EventType; +use etl::types::PipelineId; +use etl_telemetry::tracing::init_test_tracing; +use rand::random; + +/// Initial copy for a partitioned table (published via root) copies all existing rows. +#[tokio::test(flavor = "multi_thread")] +async fn partitioned_table_copy_replicates_existing_data() { + init_test_tracing(); + let database = spawn_source_database().await; + + let table_name = test_table_name("partitioned_events"); + let partition_specs = [ + ("p1", "from (1) to (100)"), + ("p2", "from (100) to (200)"), + ("p3", "from (200) to (300)"), + ]; + + let (parent_table_id, _partition_table_ids) = + create_partitioned_table(&database, table_name.clone(), &partition_specs) + .await + .expect("Failed to create partitioned table"); + + database + .run_sql(&format!( + "insert into {} (data, partition_key) values + ('event1', 50), ('event2', 150), ('event3', 250)", + table_name.as_quoted_identifier() + )) + .await + .unwrap(); + + let publication_name = "test_partitioned_pub".to_string(); + database + .create_publication(&publication_name, std::slice::from_ref(&table_name)) + .await + .expect("Failed to create publication"); + + let state_store = NotifyingStore::new(); + let destination = TestDestinationWrapper::wrap(MemoryDestination::new()); + + // Register notification for initial copy completion. + let parent_sync_done = state_store + .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone) + .await; + + let pipeline_id: PipelineId = random(); + let mut pipeline = create_pipeline( + &database.config, + pipeline_id, + publication_name, + state_store.clone(), + destination.clone(), + ); + + pipeline.start().await.unwrap(); + + parent_sync_done.notified().await; + + let _ = pipeline.shutdown_and_wait().await; + + let table_rows = destination.get_table_rows().await; + let total_rows: usize = table_rows.values().map(|rows| rows.len()).sum(); + + assert_eq!( + total_rows, 3, + "Expected 3 rows synced (one per partition), but got {total_rows}" + ); + + let table_states = state_store.get_table_replication_states().await; + + assert!( + table_states.contains_key(&parent_table_id), + "Parent table should be tracked in state" + ); + assert_eq!( + table_states.len(), + 1, + "Only the parent table should be tracked in state" + ); + + let parent_table_rows = table_rows + .iter() + .filter(|(table_id, _)| **table_id == parent_table_id) + .map(|(_, rows)| rows.len()) + .sum::(); + assert_eq!( + parent_table_rows, 3, + "Parent table should contain all rows when publishing via root" + ); +} + +/// Initial copy completes and CDC streams new rows from newly added partitions. +#[tokio::test(flavor = "multi_thread")] +async fn partitioned_table_copy_and_streams_new_data_from_new_partition() { + init_test_tracing(); + let database = spawn_source_database().await; + + let table_name = test_table_name("partitioned_events_late"); + let initial_partition_specs = [("p1", "from (1) to (100)"), ("p2", "from (100) to (200)")]; + + let (parent_table_id, _initial_partition_table_ids) = + create_partitioned_table(&database, table_name.clone(), &initial_partition_specs) + .await + .expect("Failed to create initial partitioned table"); + + database + .run_sql(&format!( + "insert into {} (data, partition_key) values \ + ('event1', 50), ('event2', 150)", + table_name.as_quoted_identifier() + )) + .await + .unwrap(); + + let publication_name = "test_partitioned_pub_late".to_string(); + database + .create_publication(&publication_name, std::slice::from_ref(&table_name)) + .await + .expect("Failed to create publication"); + + let state_store = NotifyingStore::new(); + let destination = TestDestinationWrapper::wrap(MemoryDestination::new()); + + // Register notification for initial copy completion. + let parent_sync_done = state_store + .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone) + .await; + + let pipeline_id: PipelineId = random(); + let mut pipeline = create_pipeline( + &database.config, + pipeline_id, + publication_name, + state_store.clone(), + destination.clone(), + ); + + pipeline.start().await.unwrap(); + + parent_sync_done.notified().await; + + let new_partition_name = format!("{}_{}", table_name.name, "p3"); + let new_partition_qualified_name = format!("{}.{}", table_name.schema, new_partition_name); + database + .run_sql(&format!( + "create table {} partition of {} for values from (200) to (300)", + new_partition_qualified_name, + table_name.as_quoted_identifier() + )) + .await + .unwrap(); + + database + .run_sql(&format!( + "insert into {} (data, partition_key) values ('event3', 250)", + table_name.as_quoted_identifier() + )) + .await + .unwrap(); + + // Wait for CDC to deliver the new row. + let inserts_notify = destination + .wait_for_events_count(vec![(EventType::Insert, 1)]) + .await; + inserts_notify.notified().await; + + let _ = pipeline.shutdown_and_wait().await; + + let table_rows = destination.get_table_rows().await; + let total_rows: usize = table_rows.values().map(|rows| rows.len()).sum(); + assert_eq!( + total_rows, 2, + "Expected 2 rows synced from initial copy, got {total_rows}" + ); + + let table_states = state_store.get_table_replication_states().await; + assert!(table_states.contains_key(&parent_table_id)); + assert_eq!(table_states.len(), 1); + + let parent_table_rows = table_rows + .iter() + .filter(|(table_id, _)| **table_id == parent_table_id) + .map(|(_, rows)| rows.len()) + .sum::(); + assert_eq!(parent_table_rows, 2); + + let events = destination.get_events().await; + let grouped = group_events_by_type_and_table_id(&events); + let parent_inserts = grouped + .get(&(EventType::Insert, parent_table_id)) + .cloned() + .unwrap_or_default(); + assert_eq!(parent_inserts.len(), 1); +} + +/// Dropping a child partition must not emit DELETE/TRUNCATE events. +#[tokio::test(flavor = "multi_thread")] +async fn partition_drop_does_not_emit_delete_or_truncate() { + init_test_tracing(); + let database = spawn_source_database().await; + + let table_name = test_table_name("partitioned_events_drop"); + let partition_specs = [("p1", "from (1) to (100)"), ("p2", "from (100) to (200)")]; + + let (parent_table_id, _partition_table_ids) = + create_partitioned_table(&database, table_name.clone(), &partition_specs) + .await + .expect("Failed to create partitioned table"); + + database + .run_sql(&format!( + "insert into {} (data, partition_key) values \ + ('event1', 50), ('event2', 150)", + table_name.as_quoted_identifier() + )) + .await + .unwrap(); + + let publication_name = "test_partitioned_pub_drop".to_string(); + database + .create_publication(&publication_name, std::slice::from_ref(&table_name)) + .await + .expect("Failed to create publication"); + + let state_store = NotifyingStore::new(); + let destination = TestDestinationWrapper::wrap(MemoryDestination::new()); + + let parent_sync_done = state_store + .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone) + .await; + + let pipeline_id: PipelineId = random(); + let mut pipeline = create_pipeline( + &database.config, + pipeline_id, + publication_name, + state_store.clone(), + destination.clone(), + ); + + pipeline.start().await.unwrap(); + parent_sync_done.notified().await; + + let events_before = destination.get_events().await; + let grouped_before = group_events_by_type_and_table_id(&events_before); + let del_before = grouped_before + .get(&(EventType::Delete, parent_table_id)) + .map(|v| v.len()) + .unwrap_or(0); + let trunc_before = grouped_before + .get(&(EventType::Truncate, parent_table_id)) + .map(|v| v.len()) + .unwrap_or(0); + + // Detach and drop one child partition (DDL should not generate DML events) + let child_p1_name = format!("{}_{}", table_name.name, "p1"); + let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name); + database + .run_sql(&format!( + "alter table {} detach partition {}", + table_name.as_quoted_identifier(), + child_p1_qualified + )) + .await + .unwrap(); + database + .run_sql(&format!("drop table {child_p1_qualified}")) + .await + .unwrap(); + + let _ = pipeline.shutdown_and_wait().await; + + let events_after = destination.get_events().await; + let grouped_after = group_events_by_type_and_table_id(&events_after); + let del_after = grouped_after + .get(&(EventType::Delete, parent_table_id)) + .map(|v| v.len()) + .unwrap_or(0); + let trunc_after = grouped_after + .get(&(EventType::Truncate, parent_table_id)) + .map(|v| v.len()) + .unwrap_or(0); + + assert_eq!( + del_after, del_before, + "Partition drop must not emit DELETE events" + ); + assert_eq!( + trunc_after, trunc_before, + "Partition drop must not emit TRUNCATE events" + ); +} diff --git a/etl/tests/replication.rs b/etl/tests/replication.rs index a1d9dda1e..21e7ee1c7 100644 --- a/etl/tests/replication.rs +++ b/etl/tests/replication.rs @@ -1,10 +1,13 @@ #![cfg(feature = "test-utils")] +use std::collections::HashSet; + use etl::error::ErrorKind; use etl::replication::client::PgReplicationClient; use etl::test_utils::database::{spawn_source_database, test_table_name}; use etl::test_utils::pipeline::test_slot_name; use etl::test_utils::table::assert_table_schema; +use etl::test_utils::test_schema::create_partitioned_table; use etl_postgres::tokio::test_utils::{TableModification, id_column_schema}; use etl_postgres::types::ColumnSchema; use etl_telemetry::tracing::init_test_tracing; @@ -550,11 +553,47 @@ async fn test_publication_creation_and_check() { ); // We check the table ids of the tables in the publication. - let table_ids = parent_client + let table_ids: HashSet<_> = parent_client .get_publication_table_ids("my_publication") + .await + .unwrap() + .into_iter() + .collect(); + assert_eq!(table_ids, HashSet::from([table_1_id, table_2_id])); +} + +#[tokio::test(flavor = "multi_thread")] +async fn test_publication_table_ids_collapse_partitioned_root() { + init_test_tracing(); + let database = spawn_source_database().await; + + let client = PgReplicationClient::connect(database.config.clone()) .await .unwrap(); - assert_eq!(table_ids, vec![table_1_id, table_2_id]); + + // We create a partitioned parent with two child partitions. + let table_name = test_table_name("part_parent"); + let (parent_table_id, _children) = create_partitioned_table( + &database, + table_name.clone(), + &[("p1", "from (1) to (100)"), ("p2", "from (100) to (200)")], + ) + .await + .unwrap(); + + let publication_name = "pub_part_root"; + database + .create_publication(publication_name, std::slice::from_ref(&table_name)) + .await + .unwrap(); + + let id = client + .get_publication_table_ids(publication_name) + .await + .unwrap(); + + // We expect to get only the parent table id. + assert_eq!(id, vec![parent_table_id]); } #[tokio::test(flavor = "multi_thread")] From 0a57cfede8d674f655fb0820313f448eafcb5d5e Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 21 Oct 2025 12:43:18 +0200 Subject: [PATCH 02/26] Improve --- etl/src/replication/client.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etl/src/replication/client.rs b/etl/src/replication/client.rs index 16f72a296..13c04b89f 100644 --- a/etl/src/replication/client.rs +++ b/etl/src/replication/client.rs @@ -701,7 +701,7 @@ impl PgReplicationClient { else (a.attnum in (select * from pub_attrs)) end )" - .to_string(), + .to_string(), ) } else { // Postgres 14 or earlier or unknown, fallback to no column-level filtering From cd0b607a2bc16592f9b556b780027f2457d164ed Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 21 Oct 2025 12:54:21 +0200 Subject: [PATCH 03/26] Improve --- etl/src/replication/client.rs | 64 ++++++++++++-------- etl/tests/pipeline_with_partitioned_table.rs | 3 - 2 files changed, 39 insertions(+), 28 deletions(-) diff --git a/etl/src/replication/client.rs b/etl/src/replication/client.rs index 13c04b89f..5d52600d5 100644 --- a/etl/src/replication/client.rs +++ b/etl/src/replication/client.rs @@ -417,42 +417,45 @@ impl PgReplicationClient { ) -> EtlResult> { let query = format!( r#" - with recursive has_rel as ( - -- Check if publication uses pg_publication_rel (explicit table list) - select exists( - select 1 - from pg_publication_rel r - join pg_publication p on p.oid = r.prpubid - where p.pubname = {pub} - ) as has - ), - pub_tables as ( - -- If publication has explicit relations, use pg_publication_rel + with recursive pub_tables as ( + -- Get explicit tables from publication (for regular publications) select r.prrelid as oid from pg_publication_rel r join pg_publication p on p.oid = r.prpubid - where p.pubname = {pub} and (select has from has_rel) + where p.pubname = {pub} + union all - -- Otherwise, use pg_publication_tables (for ALL TABLES publications) + + -- Get tables from pg_publication_tables (for ALL TABLES publications) + -- Only executes if pg_publication_rel is empty for this publication select c.oid from pg_publication_tables pt join pg_class c on c.relname = pt.tablename - where pt.pubname = {pub} and not (select has from has_rel) + join pg_namespace n on n.oid = c.relnamespace and n.nspname = pt.schemaname + where pt.pubname = {pub} + and not exists ( + select 1 + from pg_publication_rel r + join pg_publication p on p.oid = r.prpubid + where p.pubname = {pub} + ) ), - recurse(relid) as ( - -- Start with all published tables + hierarchy(relid) as ( + -- Start with published tables select oid from pub_tables - union all - -- Recursively walk up to find parent tables in inheritance hierarchy + + union + + -- Recursively find parent tables in inheritance hierarchy select i.inhparent from pg_inherits i - join recurse r on r.relid = i.inhrelid + join hierarchy h on h.relid = i.inhrelid ) -- Return only root tables (those without a parent) select distinct relid as oid - from recurse r + from hierarchy where not exists ( - select 1 from pg_inherits i where i.inhrelid = r.relid + select 1 from pg_inherits i where i.inhrelid = hierarchy.relid ); "#, pub = quote_literal(publication_name) @@ -687,19 +690,30 @@ impl PgReplicationClient { ( format!( "with pub_attrs as ( - select unnest(r.prattrs) + select unnest(r.prattrs) as attnum from pg_publication_rel r - left join pg_publication p on r.prpubid = p.oid + join pg_publication p on r.prpubid = p.oid where p.pubname = {publication} and r.prrelid = {table_id} + ), + -- For partitioned tables, also check if parent is in publication + pub_parent as ( + select 1 as exists_in_pub + from pg_inherits i + join pg_publication_rel r on r.prrelid = i.inhparent + join pg_publication p on p.oid = r.prpubid + where i.inhrelid = {table_id} + and p.pubname = {publication} )", publication = quote_literal(publication), ), "and ( + -- Include column if it's in pub_attrs or if parent table is in publication case (select count(*) from pub_attrs) when 0 then true - else (a.attnum in (select * from pub_attrs)) + else (a.attnum in (select attnum from pub_attrs)) end + or exists(select 1 from pub_parent) )" .to_string(), ) @@ -710,7 +724,7 @@ impl PgReplicationClient { "with pub_table as ( select 1 as exists_in_pub from pg_publication_rel r - left join pg_publication p on r.prpubid = p.oid + join pg_publication p on r.prpubid = p.oid where p.pubname = {publication} and r.prrelid = {table_id} )", diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs index a40e69b49..303025987 100644 --- a/etl/tests/pipeline_with_partitioned_table.rs +++ b/etl/tests/pipeline_with_partitioned_table.rs @@ -13,7 +13,6 @@ use etl::types::PipelineId; use etl_telemetry::tracing::init_test_tracing; use rand::random; -/// Initial copy for a partitioned table (published via root) copies all existing rows. #[tokio::test(flavor = "multi_thread")] async fn partitioned_table_copy_replicates_existing_data() { init_test_tracing(); @@ -100,7 +99,6 @@ async fn partitioned_table_copy_replicates_existing_data() { ); } -/// Initial copy completes and CDC streams new rows from newly added partitions. #[tokio::test(flavor = "multi_thread")] async fn partitioned_table_copy_and_streams_new_data_from_new_partition() { init_test_tracing(); @@ -204,7 +202,6 @@ async fn partitioned_table_copy_and_streams_new_data_from_new_partition() { assert_eq!(parent_inserts.len(), 1); } -/// Dropping a child partition must not emit DELETE/TRUNCATE events. #[tokio::test(flavor = "multi_thread")] async fn partition_drop_does_not_emit_delete_or_truncate() { init_test_tracing(); From 52fd049b30735bc4774e94fa13c2a741883e99ee Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 21 Oct 2025 12:57:02 +0200 Subject: [PATCH 04/26] Improve --- etl/src/replication/client.rs | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/etl/src/replication/client.rs b/etl/src/replication/client.rs index 5d52600d5..556c6dcb6 100644 --- a/etl/src/replication/client.rs +++ b/etl/src/replication/client.rs @@ -695,25 +695,14 @@ impl PgReplicationClient { join pg_publication p on r.prpubid = p.oid where p.pubname = {publication} and r.prrelid = {table_id} - ), - -- For partitioned tables, also check if parent is in publication - pub_parent as ( - select 1 as exists_in_pub - from pg_inherits i - join pg_publication_rel r on r.prrelid = i.inhparent - join pg_publication p on p.oid = r.prpubid - where i.inhrelid = {table_id} - and p.pubname = {publication} )", publication = quote_literal(publication), ), "and ( - -- Include column if it's in pub_attrs or if parent table is in publication case (select count(*) from pub_attrs) when 0 then true else (a.attnum in (select attnum from pub_attrs)) end - or exists(select 1 from pub_parent) )" .to_string(), ) From 8624c3a867904f65b1d01465f812e2d1a7fb1c28 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 21 Oct 2025 16:17:52 +0200 Subject: [PATCH 05/26] Improve --- etl/src/replication/client.rs | 164 ++++---- etl/tests/pipeline_with_partitioned_table.rs | 415 +++++++++++++++++++ 2 files changed, 503 insertions(+), 76 deletions(-) diff --git a/etl/src/replication/client.rs b/etl/src/replication/client.rs index 556c6dcb6..648a5686a 100644 --- a/etl/src/replication/client.rs +++ b/etl/src/replication/client.rs @@ -160,6 +160,14 @@ impl PgReplicationSlotTransaction { } } +/// Result of building publication filter SQL components. +struct PublicationFilter { + /// CTEs to include in the WITH clause (empty string if no publication filtering). + ctes: String, + /// Predicate to include in the WHERE clause (empty string if no publication filtering). + predicate: String, +} + /// A client for interacting with Postgres's logical replication features. /// /// This client provides methods for creating replication slots, managing transactions, @@ -674,6 +682,64 @@ impl PgReplicationClient { ); } + /// Builds SQL fragments for filtering columns based on publication settings. + /// + /// Returns CTEs and predicates that filter columns according to: + /// - Postgres 15+: Column-level filtering using `prattrs` + /// - Postgres 14 and earlier: Table-level filtering only + /// - No publication: No filtering (empty strings) + fn build_publication_filter_sql( + &self, + table_id: TableId, + publication_name: Option<&str>, + ) -> PublicationFilter { + let Some(publication_name) = publication_name else { + return PublicationFilter { + ctes: String::new(), + predicate: String::new(), + }; + }; + + // Postgres 15+ supports column-level filtering via prattrs + if let Some(server_version) = self.server_version + && server_version.get() >= 150000 + { + return PublicationFilter { + ctes: format!( + "pub_attrs as ( + select unnest(r.prattrs) as attnum + from pg_publication_rel r + join pg_publication p on r.prpubid = p.oid + where p.pubname = {publication} + and r.prrelid = {table_id} + ),", + publication = quote_literal(publication_name), + ), + predicate: "and ( + case (select count(*) from pub_attrs) + when 0 then true + else (a.attnum in (select attnum from pub_attrs)) + end + )".to_string(), + }; + } + + // Postgres 14 and earlier: table-level filtering only + PublicationFilter { + ctes: format!( + "pub_table as ( + select 1 as exists_in_pub + from pg_publication_rel r + join pg_publication p on r.prpubid = p.oid + where p.pubname = {publication} + and r.prrelid = {table_id} + ),", + publication = quote_literal(publication_name), + ), + predicate: "and (select count(*) from pub_table) > 0".to_string(), + } + } + /// Retrieves schema information for all columns in a table. /// /// If a publication is specified, only columns included in that publication @@ -683,78 +749,21 @@ impl PgReplicationClient { table_id: TableId, publication: Option<&str>, ) -> EtlResult> { - let (pub_cte, pub_pred) = if let Some(publication) = publication { - if let Some(server_version) = self.server_version - && server_version.get() >= 150000 - { - ( - format!( - "with pub_attrs as ( - select unnest(r.prattrs) as attnum - from pg_publication_rel r - join pg_publication p on r.prpubid = p.oid - where p.pubname = {publication} - and r.prrelid = {table_id} - )", - publication = quote_literal(publication), - ), - "and ( - case (select count(*) from pub_attrs) - when 0 then true - else (a.attnum in (select attnum from pub_attrs)) - end - )" - .to_string(), - ) - } else { - // Postgres 14 or earlier or unknown, fallback to no column-level filtering - ( - format!( - "with pub_table as ( - select 1 as exists_in_pub - from pg_publication_rel r - join pg_publication p on r.prpubid = p.oid - where p.pubname = {publication} - and r.prrelid = {table_id} - )", - publication = quote_literal(publication), - ), - format!( - "and ((select count(*) from pub_table) > 0 or exists( - -- Also allow if parent table is in publication (for partitioned tables) - select 1 from pg_inherits i - join pg_publication_rel r on r.prrelid = i.inhparent - join pg_publication p on p.oid = r.prpubid - where i.inhrelid = {table_id} and p.pubname = {publication} - ))", - publication = quote_literal(publication), - ), - ) - } - } else { - ("".to_string(), "".to_string()) - }; - - let has_pub_cte = !pub_cte.is_empty(); - - let cte_prefix = if has_pub_cte { - // If there's already a pub_cte WITH clause, add our CTEs to it with a comma - format!("{pub_cte},") - } else { - // If no pub_cte, start our own WITH clause (no need for RECURSIVE) - "with ".to_string() - }; + // Build publication filter CTEs and predicates based on Postgres version. + let publication_filter = + self.build_publication_filter_sql(table_id, publication); let column_info_query = format!( - "{cte_prefix} - -- Find direct parent of current table (if it's a partition) + r#" + with {publication_ctes} + -- Find the direct parent table (for child partitions) direct_parent as ( select i.inhparent as parent_oid from pg_inherits i - where i.inhrelid = {table_id}::oid + where i.inhrelid = {table_id} limit 1 ), - -- Get parent table's primary key columns + -- Extract primary key column names from the parent table parent_pk_cols as ( select array_agg(a.attname order by x.n) as pk_column_names from pg_constraint con @@ -764,16 +773,18 @@ impl PgReplicationClient { where con.contype = 'p' group by con.conname ) - select a.attname, + select + a.attname, a.atttypid, a.atttypmod, a.attnotnull, case - -- Direct primary key on this relation + -- Check if column has a direct primary key index when coalesce(i.indisprimary, false) = true then true - -- Inherit primary key from parent partitioned table if column name matches + -- Check if column name matches parent's primary key (for partitions) when exists ( - select 1 from parent_pk_cols pk + select 1 + from parent_pk_cols pk where a.attname = any(pk.pk_column_names) ) then true else false @@ -784,16 +795,17 @@ impl PgReplicationClient { and a.attnum = any(i.indkey) and i.indisprimary = true where a.attnum > 0::int2 - and not a.attisdropped - and a.attgenerated = '' - and a.attrelid = {table_id} - {pub_pred} + and not a.attisdropped + and a.attgenerated = '' + and a.attrelid = {table_id} + {publication_predicate} order by a.attnum - ", + "#, + publication_ctes = publication_filter.ctes, + publication_predicate = publication_filter.predicate, ); - + let mut column_schemas = vec![]; - for message in self.client.simple_query(&column_info_query).await? { if let SimpleQueryMessage::Row(row) = message { let name = Self::get_row_value::(&row, "attname", "pg_attribute").await?; diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs index 303025987..790f07e89 100644 --- a/etl/tests/pipeline_with_partitioned_table.rs +++ b/etl/tests/pipeline_with_partitioned_table.rs @@ -13,6 +13,7 @@ use etl::types::PipelineId; use etl_telemetry::tracing::init_test_tracing; use rand::random; +/// The initial copy for a partitioned table (published via root) copies all existing rows. #[tokio::test(flavor = "multi_thread")] async fn partitioned_table_copy_replicates_existing_data() { init_test_tracing(); @@ -99,6 +100,7 @@ async fn partitioned_table_copy_replicates_existing_data() { ); } +/// The initial copy completes and CDC streams new rows from newly added partitions. #[tokio::test(flavor = "multi_thread")] async fn partitioned_table_copy_and_streams_new_data_from_new_partition() { init_test_tracing(); @@ -202,6 +204,7 @@ async fn partitioned_table_copy_and_streams_new_data_from_new_partition() { assert_eq!(parent_inserts.len(), 1); } +/// Dropping a child partition must not emit DELETE/TRUNCATE events. #[tokio::test(flavor = "multi_thread")] async fn partition_drop_does_not_emit_delete_or_truncate() { init_test_tracing(); @@ -298,3 +301,415 @@ async fn partition_drop_does_not_emit_delete_or_truncate() { "Partition drop must not emit TRUNCATE events" ); } + +/// When a partition is detached from a partitioned table with an explicit publication, +/// inserts into the detached partition should NOT be replicated since only the parent +/// table is in the publication. +#[tokio::test(flavor = "multi_thread")] +async fn partition_detach_with_explicit_publication_does_not_replicate_detached_inserts() { + init_test_tracing(); + let database = spawn_source_database().await; + + let table_name = test_table_name("partitioned_events_detach"); + let partition_specs = [("p1", "from (1) to (100)"), ("p2", "from (100) to (200)")]; + + let (parent_table_id, partition_table_ids) = + create_partitioned_table(&database, table_name.clone(), &partition_specs) + .await + .expect("Failed to create partitioned table"); + + let p1_table_id = partition_table_ids[0]; + + // Insert initial data into both partitions + database + .run_sql(&format!( + "insert into {} (data, partition_key) values \ + ('event1', 50), ('event2', 150)", + table_name.as_quoted_identifier() + )) + .await + .unwrap(); + + // Create explicit publication for parent table only + let publication_name = "test_partitioned_pub_detach".to_string(); + database + .create_publication(&publication_name, std::slice::from_ref(&table_name)) + .await + .expect("Failed to create publication"); + + let state_store = NotifyingStore::new(); + let destination = TestDestinationWrapper::wrap(MemoryDestination::new()); + + let parent_sync_done = state_store + .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone) + .await; + + let pipeline_id: PipelineId = random(); + let mut pipeline = create_pipeline( + &database.config, + pipeline_id, + publication_name.clone(), + state_store.clone(), + destination.clone(), + ); + + pipeline.start().await.unwrap(); + parent_sync_done.notified().await; + + // Verify initial sync copied both rows + let table_rows = destination.get_table_rows().await; + assert_eq!(table_rows.len(), 1); + let parent_rows: usize = table_rows + .get(&parent_table_id) + .map(|rows| rows.len()) + .unwrap_or(0); + assert_eq!( + parent_rows, 2, + "Parent table should have 2 rows from initial COPY" + ); + + // Detach partition p1 from parent + let child_p1_name = format!("{}_{}", table_name.name, "p1"); + let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name); + database + .run_sql(&format!( + "alter table {} detach partition {}", + table_name.as_quoted_identifier(), + child_p1_qualified + )) + .await + .unwrap(); + + // Insert into the detached partition (should NOT be replicated) + database + .run_sql(&format!( + "insert into {} (data, partition_key) values ('detached_event', 25)", + child_p1_qualified + )) + .await + .unwrap(); + + // Insert into parent table (should be replicated to remaining partition p2) + database + .run_sql(&format!( + "insert into {} (data, partition_key) values ('parent_event', 125)", + table_name.as_quoted_identifier() + )) + .await + .unwrap(); + + // Wait for the parent table insert to be replicated + let inserts_notify = destination + .wait_for_events_count(vec![(EventType::Insert, 1)]) + .await; + inserts_notify.notified().await; + + let _ = pipeline.shutdown_and_wait().await; + + // Verify events + let events = destination.get_events().await; + let grouped = group_events_by_type_and_table_id(&events); + + // Parent table should have 1 insert event (the insert after detachment) + let parent_inserts = grouped + .get(&(EventType::Insert, parent_table_id)) + .cloned() + .unwrap_or_default(); + assert_eq!( + parent_inserts.len(), + 1, + "Parent table should have exactly 1 CDC insert event" + ); + + // Detached partition should have NO insert events + let detached_inserts = grouped + .get(&(EventType::Insert, p1_table_id)) + .cloned() + .unwrap_or_default(); + assert_eq!( + detached_inserts.len(), + 0, + "Detached partition inserts should NOT be replicated" + ); +} + +// TODO: validate. +/// When a partition is detached from a partitioned table with FOR ALL TABLES publication, +/// the detached partition becomes a standalone table. However, the running pipeline won't +/// automatically discover it without re-scanning. This test validates the catalog state. +#[tokio::test(flavor = "multi_thread")] +async fn partition_detach_with_all_tables_publication_catalog_state() { + init_test_tracing(); + let database = spawn_source_database().await; + + let table_name = test_table_name("partitioned_events_all_tables"); + let partition_specs = [("p1", "from (1) to (100)"), ("p2", "from (100) to (200)")]; + + let (parent_table_id, partition_table_ids) = + create_partitioned_table(&database, table_name.clone(), &partition_specs) + .await + .expect("Failed to create partitioned table"); + + let p1_table_id = partition_table_ids[0]; + + // Insert initial data + database + .run_sql(&format!( + "insert into {} (data, partition_key) values \ + ('event1', 50), ('event2', 150)", + table_name.as_quoted_identifier() + )) + .await + .unwrap(); + + // Create FOR ALL TABLES publication + let publication_name = "test_all_tables_pub_detach".to_string(); + database + .run_sql(&format!( + "create publication {} for all tables with (publish_via_partition_root = true)", + publication_name + )) + .await + .unwrap(); + + let state_store = NotifyingStore::new(); + let destination = TestDestinationWrapper::wrap(MemoryDestination::new()); + + let parent_sync_done = state_store + .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone) + .await; + + let pipeline_id: PipelineId = random(); + let mut pipeline = create_pipeline( + &database.config, + pipeline_id, + publication_name.clone(), + state_store.clone(), + destination.clone(), + ); + + pipeline.start().await.unwrap(); + parent_sync_done.notified().await; + + // Verify initial state: only parent table is tracked + let table_states_before = state_store.get_table_replication_states().await; + assert!( + table_states_before.contains_key(&parent_table_id), + "Parent table should be tracked before detachment" + ); + assert!( + !table_states_before.contains_key(&p1_table_id), + "Child partition p1 should NOT be tracked separately before detachment" + ); + + // Detach partition p1 + let child_p1_name = format!("{}_{}", table_name.name, "p1"); + let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name); + database + .run_sql(&format!( + "alter table {} detach partition {}", + table_name.as_quoted_identifier(), + child_p1_qualified + )) + .await + .unwrap(); + + // Verify catalog state: detached partition is now a standalone table + // Check pg_inherits - should no longer have parent relationship + let inherits_check = database + .client + .as_ref() + .unwrap() + .query( + "select count(*) as cnt from pg_inherits where inhrelid = $1", + &[&p1_table_id.0], + ) + .await + .unwrap(); + let inherits_count: i64 = inherits_check[0].get("cnt"); + assert_eq!( + inherits_count, 0, + "Detached partition should have no parent in pg_inherits" + ); + + // Check pg_publication_tables - with FOR ALL TABLES, detached partition should appear + let pub_tables_check = database + .client + .as_ref() + .unwrap() + .query( + "select count(*) as cnt from pg_publication_tables + where pubname = $1 and tablename = $2", + &[&publication_name, &child_p1_name], + ) + .await + .unwrap(); + let pub_tables_count: i64 = pub_tables_check[0].get("cnt"); + assert_eq!( + pub_tables_count, 1, + "Detached partition should appear in pg_publication_tables for ALL TABLES publication" + ); + + // Insert into detached partition + database + .run_sql(&format!( + "insert into {} (data, partition_key) values ('detached_event', 25)", + child_p1_qualified + )) + .await + .unwrap(); + + // Note: The running pipeline won't automatically discover the detached partition + // without re-scanning for new tables. This is expected behavior - table discovery + // happens at pipeline start or explicit refresh. + + let _ = pipeline.shutdown_and_wait().await; + + // The pipeline state should still only track the parent table (not the detached partition) + // because it hasn't re-scanned for new tables + let table_states_after = state_store.get_table_replication_states().await; + assert!( + table_states_after.contains_key(&parent_table_id), + "Parent table should still be tracked after detachment" + ); + + // The detached partition insert should NOT be replicated in this pipeline run + // because the pipeline hasn't discovered it as a new table + let events = destination.get_events().await; + let grouped = group_events_by_type_and_table_id(&events); + let detached_inserts = grouped + .get(&(EventType::Insert, p1_table_id)) + .cloned() + .unwrap_or_default(); + assert_eq!( + detached_inserts.len(), + 0, + "Detached partition inserts should NOT be replicated without table re-discovery" + ); +} + + +// TODO: validate. +/// When a partition is detached and then the pipeline restarts (simulating table re-discovery), +/// the detached partition should be discovered as a new standalone table if using FOR ALL TABLES. +#[tokio::test(flavor = "multi_thread")] +async fn partition_detach_with_all_tables_and_pipeline_restart_discovers_new_table() { + init_test_tracing(); + let database = spawn_source_database().await; + + let table_name = test_table_name("partitioned_events_restart"); + let partition_specs = [("p1", "from (1) to (100)"), ("p2", "from (100) to (200)")]; + + let (parent_table_id, partition_table_ids) = + create_partitioned_table(&database, table_name.clone(), &partition_specs) + .await + .expect("Failed to create partitioned table"); + + let p1_table_id = partition_table_ids[0]; + + // Insert initial data + database + .run_sql(&format!( + "insert into {} (data, partition_key) values \ + ('event1', 50), ('event2', 150)", + table_name.as_quoted_identifier() + )) + .await + .unwrap(); + + // Create FOR ALL TABLES publication + let publication_name = "test_all_tables_restart".to_string(); + database + .run_sql(&format!( + "create publication {} for all tables with (publish_via_partition_root = true)", + publication_name + )) + .await + .unwrap(); + + let state_store = NotifyingStore::new(); + let destination = TestDestinationWrapper::wrap(MemoryDestination::new()); + + // Start pipeline and wait for initial sync + let parent_sync_done = state_store + .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone) + .await; + + let pipeline_id: PipelineId = random(); + let mut pipeline = create_pipeline( + &database.config, + pipeline_id, + publication_name.clone(), + state_store.clone(), + destination.clone(), + ); + + pipeline.start().await.unwrap(); + parent_sync_done.notified().await; + + // Shutdown the first pipeline + let _ = pipeline.shutdown_and_wait().await; + + // Detach partition p1 + let child_p1_name = format!("{}_{}", table_name.name, "p1"); + let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name); + database + .run_sql(&format!( + "alter table {} detach partition {}", + table_name.as_quoted_identifier(), + child_p1_qualified + )) + .await + .unwrap(); + + // Insert into detached partition (while pipeline is stopped) + database + .run_sql(&format!( + "insert into {} (data, partition_key) values ('detached_event', 25)", + child_p1_qualified + )) + .await + .unwrap(); + + // Restart the pipeline - it should now discover the detached partition as a new table + let state_store2 = NotifyingStore::new(); + let destination2 = TestDestinationWrapper::wrap(MemoryDestination::new()); + + let detached_sync_done = state_store2 + .notify_on_table_state_type(p1_table_id, TableReplicationPhaseType::SyncDone) + .await; + + let pipeline_id2: PipelineId = random(); + let mut pipeline2 = create_pipeline( + &database.config, + pipeline_id2, + publication_name.clone(), + state_store2.clone(), + destination2.clone(), + ); + + pipeline2.start().await.unwrap(); + + // Wait for detached partition to be synced + detached_sync_done.notified().await; + + let _ = pipeline2.shutdown_and_wait().await; + + // Verify the detached partition was discovered and synced + let table_states = state_store2.get_table_replication_states().await; + assert!( + table_states.contains_key(&p1_table_id), + "Detached partition should be discovered as a standalone table after restart" + ); + + // Verify the data from the detached partition was copied + let table_rows = destination2.get_table_rows().await; + let detached_rows: usize = table_rows + .get(&p1_table_id) + .map(|rows| rows.len()) + .unwrap_or(0); + assert!( + detached_rows > 0, + "Detached partition should have rows synced after pipeline restart" + ); +} From f2378fffe323cf72a598d65842261f259539c8ea Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 21 Oct 2025 16:20:45 +0200 Subject: [PATCH 06/26] Improve --- etl/tests/pipeline_with_partitioned_table.rs | 29 ++++++++++---------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs index 790f07e89..b2d1acecf 100644 --- a/etl/tests/pipeline_with_partitioned_table.rs +++ b/etl/tests/pipeline_with_partitioned_table.rs @@ -13,7 +13,8 @@ use etl::types::PipelineId; use etl_telemetry::tracing::init_test_tracing; use rand::random; -/// The initial copy for a partitioned table (published via root) copies all existing rows. +/// Tests that initial COPY replicates all rows from a partitioned table. +/// Only the parent table is tracked, not individual child partitions. #[tokio::test(flavor = "multi_thread")] async fn partitioned_table_copy_replicates_existing_data() { init_test_tracing(); @@ -100,7 +101,8 @@ async fn partitioned_table_copy_replicates_existing_data() { ); } -/// The initial copy completes and CDC streams new rows from newly added partitions. +/// Tests that CDC streams inserts to partitions created after pipeline startup. +/// New partitions are automatically included without publication changes. #[tokio::test(flavor = "multi_thread")] async fn partitioned_table_copy_and_streams_new_data_from_new_partition() { init_test_tracing(); @@ -204,7 +206,8 @@ async fn partitioned_table_copy_and_streams_new_data_from_new_partition() { assert_eq!(parent_inserts.len(), 1); } -/// Dropping a child partition must not emit DELETE/TRUNCATE events. +/// Tests that detaching and dropping a partition does not emit DELETE or TRUNCATE events. +/// Partition management is a DDL operation, not DML, so no data events should be generated. #[tokio::test(flavor = "multi_thread")] async fn partition_drop_does_not_emit_delete_or_truncate() { init_test_tracing(); @@ -302,9 +305,9 @@ async fn partition_drop_does_not_emit_delete_or_truncate() { ); } -/// When a partition is detached from a partitioned table with an explicit publication, -/// inserts into the detached partition should NOT be replicated since only the parent -/// table is in the publication. +/// Tests that detached partitions are not replicated with explicit publications. +/// Once detached, the partition becomes independent and is not in the publication since +/// only the parent table was explicitly added. Inserts to detached partitions are not replicated. #[tokio::test(flavor = "multi_thread")] async fn partition_detach_with_explicit_publication_does_not_replicate_detached_inserts() { init_test_tracing(); @@ -433,10 +436,9 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_ ); } -// TODO: validate. -/// When a partition is detached from a partitioned table with FOR ALL TABLES publication, -/// the detached partition becomes a standalone table. However, the running pipeline won't -/// automatically discover it without re-scanning. This test validates the catalog state. +/// Tests catalog state when a partition is detached with FOR ALL TABLES publication. +/// The detached partition appears in pg_publication_tables but is not automatically discovered +/// by the running pipeline. Table discovery only happens at pipeline startup, not during execution. #[tokio::test(flavor = "multi_thread")] async fn partition_detach_with_all_tables_publication_catalog_state() { init_test_tracing(); @@ -588,10 +590,9 @@ async fn partition_detach_with_all_tables_publication_catalog_state() { ); } - -// TODO: validate. -/// When a partition is detached and then the pipeline restarts (simulating table re-discovery), -/// the detached partition should be discovered as a new standalone table if using FOR ALL TABLES. +/// Tests that a detached partition is discovered as a new table after pipeline restart. +/// With FOR ALL TABLES publication, the detached partition is re-discovered during table +/// scanning at startup and its data is replicated. #[tokio::test(flavor = "multi_thread")] async fn partition_detach_with_all_tables_and_pipeline_restart_discovers_new_table() { init_test_tracing(); From ac65190e6c9d025dd32e452dbd56e3e37b852bbf Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Fri, 24 Oct 2025 09:22:47 +0200 Subject: [PATCH 07/26] Improve --- etl/tests/pipeline_with_partitioned_table.rs | 61 ++++++++++---------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs index b2d1acecf..cb2e89df1 100644 --- a/etl/tests/pipeline_with_partitioned_table.rs +++ b/etl/tests/pipeline_with_partitioned_table.rs @@ -266,7 +266,7 @@ async fn partition_drop_does_not_emit_delete_or_truncate() { .map(|v| v.len()) .unwrap_or(0); - // Detach and drop one child partition (DDL should not generate DML events) + // Detach and drop one child partition (DDL should not generate DML events). let child_p1_name = format!("{}_{}", table_name.name, "p1"); let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name); database @@ -323,7 +323,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_ let p1_table_id = partition_table_ids[0]; - // Insert initial data into both partitions + // Insert initial data into both partitions. database .run_sql(&format!( "insert into {} (data, partition_key) values \ @@ -333,7 +333,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_ .await .unwrap(); - // Create explicit publication for parent table only + // Create explicit publication for parent table only. let publication_name = "test_partitioned_pub_detach".to_string(); database .create_publication(&publication_name, std::slice::from_ref(&table_name)) @@ -359,7 +359,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_ pipeline.start().await.unwrap(); parent_sync_done.notified().await; - // Verify initial sync copied both rows + // Verify initial sync copied both rows. let table_rows = destination.get_table_rows().await; assert_eq!(table_rows.len(), 1); let parent_rows: usize = table_rows @@ -371,7 +371,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_ "Parent table should have 2 rows from initial COPY" ); - // Detach partition p1 from parent + // Detach partition p1 from parent. let child_p1_name = format!("{}_{}", table_name.name, "p1"); let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name); database @@ -383,7 +383,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_ .await .unwrap(); - // Insert into the detached partition (should NOT be replicated) + // Insert into the detached partition (should NOT be replicated). database .run_sql(&format!( "insert into {} (data, partition_key) values ('detached_event', 25)", @@ -392,7 +392,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_ .await .unwrap(); - // Insert into parent table (should be replicated to remaining partition p2) + // Insert into the parent table (should be replicated to remaining partition p2). database .run_sql(&format!( "insert into {} (data, partition_key) values ('parent_event', 125)", @@ -401,7 +401,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_ .await .unwrap(); - // Wait for the parent table insert to be replicated + // Wait for the parent table insert to be replicated. let inserts_notify = destination .wait_for_events_count(vec![(EventType::Insert, 1)]) .await; @@ -413,7 +413,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_ let events = destination.get_events().await; let grouped = group_events_by_type_and_table_id(&events); - // Parent table should have 1 insert event (the insert after detachment) + // Parent table should have 1 insert event (the insert after detachment). let parent_inserts = grouped .get(&(EventType::Insert, parent_table_id)) .cloned() @@ -424,7 +424,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_ "Parent table should have exactly 1 CDC insert event" ); - // Detached partition should have NO insert events + // Detached partition should have NO insert events. let detached_inserts = grouped .get(&(EventType::Insert, p1_table_id)) .cloned() @@ -454,7 +454,7 @@ async fn partition_detach_with_all_tables_publication_catalog_state() { let p1_table_id = partition_table_ids[0]; - // Insert initial data + // Insert initial data. database .run_sql(&format!( "insert into {} (data, partition_key) values \ @@ -464,7 +464,7 @@ async fn partition_detach_with_all_tables_publication_catalog_state() { .await .unwrap(); - // Create FOR ALL TABLES publication + // Create FOR ALL TABLES publication. let publication_name = "test_all_tables_pub_detach".to_string(); database .run_sql(&format!( @@ -493,7 +493,7 @@ async fn partition_detach_with_all_tables_publication_catalog_state() { pipeline.start().await.unwrap(); parent_sync_done.notified().await; - // Verify initial state: only parent table is tracked + // Verify the initial state. The parent table is the only table tracked. let table_states_before = state_store.get_table_replication_states().await; assert!( table_states_before.contains_key(&parent_table_id), @@ -504,7 +504,7 @@ async fn partition_detach_with_all_tables_publication_catalog_state() { "Child partition p1 should NOT be tracked separately before detachment" ); - // Detach partition p1 + // Detach partition p1. let child_p1_name = format!("{}_{}", table_name.name, "p1"); let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name); database @@ -516,8 +516,7 @@ async fn partition_detach_with_all_tables_publication_catalog_state() { .await .unwrap(); - // Verify catalog state: detached partition is now a standalone table - // Check pg_inherits - should no longer have parent relationship + // Verify catalog state. The detached partition is now a standalone table. let inherits_check = database .client .as_ref() @@ -534,7 +533,7 @@ async fn partition_detach_with_all_tables_publication_catalog_state() { "Detached partition should have no parent in pg_inherits" ); - // Check pg_publication_tables - with FOR ALL TABLES, detached partition should appear + // Check pg_publication_tables. With FOR ALL TABLES, the detached partition should appear. let pub_tables_check = database .client .as_ref() @@ -552,7 +551,7 @@ async fn partition_detach_with_all_tables_publication_catalog_state() { "Detached partition should appear in pg_publication_tables for ALL TABLES publication" ); - // Insert into detached partition + // Insert into detached partition. database .run_sql(&format!( "insert into {} (data, partition_key) values ('detached_event', 25)", @@ -562,13 +561,13 @@ async fn partition_detach_with_all_tables_publication_catalog_state() { .unwrap(); // Note: The running pipeline won't automatically discover the detached partition - // without re-scanning for new tables. This is expected behavior - table discovery + // without re-scanning for new tables. This is expected behavior, the table discovery // happens at pipeline start or explicit refresh. let _ = pipeline.shutdown_and_wait().await; // The pipeline state should still only track the parent table (not the detached partition) - // because it hasn't re-scanned for new tables + // because it hasn't re-scanned for new tables. let table_states_after = state_store.get_table_replication_states().await; assert!( table_states_after.contains_key(&parent_table_id), @@ -576,7 +575,7 @@ async fn partition_detach_with_all_tables_publication_catalog_state() { ); // The detached partition insert should NOT be replicated in this pipeline run - // because the pipeline hasn't discovered it as a new table + // because the pipeline hasn't discovered it as a new table. let events = destination.get_events().await; let grouped = group_events_by_type_and_table_id(&events); let detached_inserts = grouped @@ -608,7 +607,7 @@ async fn partition_detach_with_all_tables_and_pipeline_restart_discovers_new_tab let p1_table_id = partition_table_ids[0]; - // Insert initial data + // Insert initial data. database .run_sql(&format!( "insert into {} (data, partition_key) values \ @@ -618,7 +617,7 @@ async fn partition_detach_with_all_tables_and_pipeline_restart_discovers_new_tab .await .unwrap(); - // Create FOR ALL TABLES publication + // Create FOR ALL TABLES publication. let publication_name = "test_all_tables_restart".to_string(); database .run_sql(&format!( @@ -631,7 +630,7 @@ async fn partition_detach_with_all_tables_and_pipeline_restart_discovers_new_tab let state_store = NotifyingStore::new(); let destination = TestDestinationWrapper::wrap(MemoryDestination::new()); - // Start pipeline and wait for initial sync + // Start pipeline and wait for initial sync. let parent_sync_done = state_store .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone) .await; @@ -648,10 +647,10 @@ async fn partition_detach_with_all_tables_and_pipeline_restart_discovers_new_tab pipeline.start().await.unwrap(); parent_sync_done.notified().await; - // Shutdown the first pipeline + // Shutdown the first pipeline. let _ = pipeline.shutdown_and_wait().await; - // Detach partition p1 + // Detach partition p1. let child_p1_name = format!("{}_{}", table_name.name, "p1"); let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name); database @@ -663,7 +662,7 @@ async fn partition_detach_with_all_tables_and_pipeline_restart_discovers_new_tab .await .unwrap(); - // Insert into detached partition (while pipeline is stopped) + // Insert into detached partition (while pipeline is stopped). database .run_sql(&format!( "insert into {} (data, partition_key) values ('detached_event', 25)", @@ -672,7 +671,7 @@ async fn partition_detach_with_all_tables_and_pipeline_restart_discovers_new_tab .await .unwrap(); - // Restart the pipeline - it should now discover the detached partition as a new table + // Restart the pipeline. It should now discover the detached partition as a new table. let state_store2 = NotifyingStore::new(); let destination2 = TestDestinationWrapper::wrap(MemoryDestination::new()); @@ -691,19 +690,19 @@ async fn partition_detach_with_all_tables_and_pipeline_restart_discovers_new_tab pipeline2.start().await.unwrap(); - // Wait for detached partition to be synced + // Wait for the detached partition to be synced. detached_sync_done.notified().await; let _ = pipeline2.shutdown_and_wait().await; - // Verify the detached partition was discovered and synced + // Verify the detached partition was discovered and synced. let table_states = state_store2.get_table_replication_states().await; assert!( table_states.contains_key(&p1_table_id), "Detached partition should be discovered as a standalone table after restart" ); - // Verify the data from the detached partition was copied + // Verify the data from the detached partition was copied. let table_rows = destination2.get_table_rows().await; let detached_rows: usize = table_rows .get(&p1_table_id) From 8d553c38b9cb73b2ab399ab67299073501993f4e Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Fri, 24 Oct 2025 10:35:56 +0200 Subject: [PATCH 08/26] Improve --- etl/src/replication/client.rs | 8 +-- etl/tests/pipeline_with_partitioned_table.rs | 51 +++++++++++++------- 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/etl/src/replication/client.rs b/etl/src/replication/client.rs index 648a5686a..bf594a4fc 100644 --- a/etl/src/replication/client.rs +++ b/etl/src/replication/client.rs @@ -720,7 +720,8 @@ impl PgReplicationClient { when 0 then true else (a.attnum in (select attnum from pub_attrs)) end - )".to_string(), + )" + .to_string(), }; } @@ -750,8 +751,7 @@ impl PgReplicationClient { publication: Option<&str>, ) -> EtlResult> { // Build publication filter CTEs and predicates based on Postgres version. - let publication_filter = - self.build_publication_filter_sql(table_id, publication); + let publication_filter = self.build_publication_filter_sql(table_id, publication); let column_info_query = format!( r#" @@ -804,7 +804,7 @@ impl PgReplicationClient { publication_ctes = publication_filter.ctes, publication_predicate = publication_filter.predicate, ); - + let mut column_schemas = vec![]; for message in self.client.simple_query(&column_info_query).await? { if let SimpleQueryMessage::Row(row) = message { diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs index cb2e89df1..fa572dcac 100644 --- a/etl/tests/pipeline_with_partitioned_table.rs +++ b/etl/tests/pipeline_with_partitioned_table.rs @@ -647,8 +647,16 @@ async fn partition_detach_with_all_tables_and_pipeline_restart_discovers_new_tab pipeline.start().await.unwrap(); parent_sync_done.notified().await; - // Shutdown the first pipeline. - let _ = pipeline.shutdown_and_wait().await; + // Verify the initial state. The parent table is the only table tracked. + let table_states_before = state_store.get_table_replication_states().await; + assert!( + table_states_before.contains_key(&parent_table_id), + "Parent table should be tracked before detachment" + ); + assert!( + !table_states_before.contains_key(&p1_table_id), + "Child partition p1 should NOT be tracked separately before detachment" + ); // Detach partition p1. let child_p1_name = format!("{}_{}", table_name.name, "p1"); @@ -671,45 +679,52 @@ async fn partition_detach_with_all_tables_and_pipeline_restart_discovers_new_tab .await .unwrap(); - // Restart the pipeline. It should now discover the detached partition as a new table. - let state_store2 = NotifyingStore::new(); - let destination2 = TestDestinationWrapper::wrap(MemoryDestination::new()); + // Shutdown the pipeline. + let _ = pipeline.shutdown_and_wait().await; - let detached_sync_done = state_store2 + // Restart the pipeline. It should now discover the detached partition as a new table. + let detached_sync_done = state_store .notify_on_table_state_type(p1_table_id, TableReplicationPhaseType::SyncDone) .await; - let pipeline_id2: PipelineId = random(); - let mut pipeline2 = create_pipeline( + let mut pipeline = create_pipeline( &database.config, - pipeline_id2, + pipeline_id, publication_name.clone(), - state_store2.clone(), - destination2.clone(), + state_store.clone(), + destination.clone(), ); - pipeline2.start().await.unwrap(); + pipeline.start().await.unwrap(); // Wait for the detached partition to be synced. detached_sync_done.notified().await; - let _ = pipeline2.shutdown_and_wait().await; + let _ = pipeline.shutdown_and_wait().await; // Verify the detached partition was discovered and synced. - let table_states = state_store2.get_table_replication_states().await; + let table_states_after = state_store.get_table_replication_states().await; assert!( - table_states.contains_key(&p1_table_id), + table_states_after.contains_key(&p1_table_id), "Detached partition should be discovered as a standalone table after restart" ); // Verify the data from the detached partition was copied. - let table_rows = destination2.get_table_rows().await; + let table_rows = destination.get_table_rows().await; + let parent_rows: usize = table_rows + .get(&p1_table_id) + .map(|rows| rows.len()) + .unwrap_or(0); + assert_eq!( + parent_rows, 2, + "The parent table should have the initial rows" + ); let detached_rows: usize = table_rows .get(&p1_table_id) .map(|rows| rows.len()) .unwrap_or(0); - assert!( - detached_rows > 0, + assert_eq!( + detached_rows, 2, "Detached partition should have rows synced after pipeline restart" ); } From 319273b63e8939716770bdc94c57ddab1d387d9f Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Fri, 24 Oct 2025 10:43:32 +0200 Subject: [PATCH 09/26] Improve --- etl/tests/pipeline_with_partitioned_table.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs index fa572dcac..620476b1c 100644 --- a/etl/tests/pipeline_with_partitioned_table.rs +++ b/etl/tests/pipeline_with_partitioned_table.rs @@ -440,7 +440,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_ /// The detached partition appears in pg_publication_tables but is not automatically discovered /// by the running pipeline. Table discovery only happens at pipeline startup, not during execution. #[tokio::test(flavor = "multi_thread")] -async fn partition_detach_with_all_tables_publication_catalog_state() { +async fn partition_detach_with_all_tables_publication_does_not_replicate_detached_inserts() { init_test_tracing(); let database = spawn_source_database().await; @@ -593,7 +593,7 @@ async fn partition_detach_with_all_tables_publication_catalog_state() { /// With FOR ALL TABLES publication, the detached partition is re-discovered during table /// scanning at startup and its data is replicated. #[tokio::test(flavor = "multi_thread")] -async fn partition_detach_with_all_tables_and_pipeline_restart_discovers_new_table() { +async fn partition_detach_with_all_tables_publication_does_replicate_detached_inserts_on_restart() { init_test_tracing(); let database = spawn_source_database().await; From 29f1f41cc401b55059f177b4ce7f2ca26026e353 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Fri, 24 Oct 2025 11:29:06 +0200 Subject: [PATCH 10/26] Improve --- etl/src/replication/client.rs | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/etl/src/replication/client.rs b/etl/src/replication/client.rs index bf594a4fc..da36cc10d 100644 --- a/etl/src/replication/client.rs +++ b/etl/src/replication/client.rs @@ -642,6 +642,7 @@ impl PgReplicationClient { ) -> EtlResult { let table_name = self.get_table_name(table_id).await?; let column_schemas = self.get_column_schemas(table_id, publication).await?; + warn!("COLUMNS SCHEMAS FOR TABLE {:?}: {:?}", table_name, column_schemas); Ok(TableSchema { name: table_name, @@ -706,20 +707,27 @@ impl PgReplicationClient { { return PublicationFilter { ctes: format!( - "pub_attrs as ( - select unnest(r.prattrs) as attnum - from pg_publication_rel r - join pg_publication p on r.prpubid = p.oid + "pub_info as ( + select p.puballtables, r.prattrs + from pg_publication p + left join pg_publication_rel r on r.prpubid = p.oid and r.prrelid = {table_id} where p.pubname = {publication} - and r.prrelid = {table_id} + ), + pub_attrs as ( + select unnest(prattrs) as attnum + from pub_info + where prattrs is not null ),", publication = quote_literal(publication_name), ), predicate: "and ( - case (select count(*) from pub_attrs) - when 0 then true - else (a.attnum in (select attnum from pub_attrs)) - end + (select puballtables from pub_info) = true + or ( + case (select count(*) from pub_attrs) + when 0 then true + else (a.attnum in (select attnum from pub_attrs)) + end + ) )" .to_string(), }; @@ -728,7 +736,12 @@ impl PgReplicationClient { // Postgres 14 and earlier: table-level filtering only PublicationFilter { ctes: format!( - "pub_table as ( + "pub_info as ( + select p.puballtables + from pg_publication p + where p.pubname = {publication} + ), + pub_table as ( select 1 as exists_in_pub from pg_publication_rel r join pg_publication p on r.prpubid = p.oid @@ -737,7 +750,7 @@ impl PgReplicationClient { ),", publication = quote_literal(publication_name), ), - predicate: "and (select count(*) from pub_table) > 0".to_string(), + predicate: "and ((select puballtables from pub_info) = true or (select count(*) from pub_table) > 0)".to_string(), } } From 9a4d1fc0d96c0dd08bc675f2bbb434c3381051fc Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Fri, 24 Oct 2025 13:26:14 +0200 Subject: [PATCH 11/26] Improve --- etl-postgres/src/tokio/test_utils.rs | 6 +- etl/src/replication/client.rs | 13 +- etl/tests/pipeline_with_partitioned_table.rs | 414 ++++++++++++++++++- 3 files changed, 419 insertions(+), 14 deletions(-) diff --git a/etl-postgres/src/tokio/test_utils.rs b/etl-postgres/src/tokio/test_utils.rs index 7aad2b62b..8a0d52601 100644 --- a/etl-postgres/src/tokio/test_utils.rs +++ b/etl-postgres/src/tokio/test_utils.rs @@ -87,9 +87,9 @@ impl PgDatabase { // PostgreSQL 15+ supports FOR ALL TABLES IN SCHEMA syntax let create_publication_query = match schema { Some(schema_name) => format!( - "create publication {publication_name} for tables in schema {schema_name}" + "create publication {publication_name} for tables in schema {schema_name} with (publish_via_partition_root = true)" ), - None => format!("create publication {publication_name} for all tables"), + None => format!("create publication {publication_name} for all tables with (publish_via_partition_root = true)"), }; client.execute(&create_publication_query, &[]).await?; @@ -116,7 +116,7 @@ impl PgDatabase { } None => { let create_publication_query = - format!("create publication {publication_name} for all tables"); + format!("create publication {publication_name} for all tables with (publish_via_partition_root = true)"); client.execute(&create_publication_query, &[]).await?; } } diff --git a/etl/src/replication/client.rs b/etl/src/replication/client.rs index da36cc10d..5a03e0d6c 100644 --- a/etl/src/replication/client.rs +++ b/etl/src/replication/client.rs @@ -434,7 +434,7 @@ impl PgReplicationClient { union all - -- Get tables from pg_publication_tables (for ALL TABLES publications) + -- Get tables from pg_publication_tables (for ALL TABLES and FOR TABLES IN SCHEMA) -- Only executes if pg_publication_rel is empty for this publication select c.oid from pg_publication_tables pt @@ -642,7 +642,6 @@ impl PgReplicationClient { ) -> EtlResult { let table_name = self.get_table_name(table_id).await?; let column_schemas = self.get_column_schemas(table_id, publication).await?; - warn!("COLUMNS SCHEMAS FOR TABLE {:?}: {:?}", table_name, column_schemas); Ok(TableSchema { name: table_name, @@ -708,7 +707,7 @@ impl PgReplicationClient { return PublicationFilter { ctes: format!( "pub_info as ( - select p.puballtables, r.prattrs + select p.oid as puboid, p.puballtables, r.prattrs from pg_publication p left join pg_publication_rel r on r.prpubid = p.oid and r.prrelid = {table_id} where p.pubname = {publication} @@ -717,11 +716,19 @@ impl PgReplicationClient { select unnest(prattrs) as attnum from pub_info where prattrs is not null + ), + pub_schema as ( + select 1 as exists_in_schema_pub + from pub_info + join pg_publication_namespace pn on pn.pnpubid = pub_info.puboid + join pg_class c on c.relnamespace = pn.pnnspid + where c.oid = {table_id} ),", publication = quote_literal(publication_name), ), predicate: "and ( (select puballtables from pub_info) = true + or (select count(*) from pub_schema) > 0 or ( case (select count(*) from pub_attrs) when 0 then true diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs index 620476b1c..d1ab3c5f0 100644 --- a/etl/tests/pipeline_with_partitioned_table.rs +++ b/etl/tests/pipeline_with_partitioned_table.rs @@ -467,10 +467,7 @@ async fn partition_detach_with_all_tables_publication_does_not_replicate_detache // Create FOR ALL TABLES publication. let publication_name = "test_all_tables_pub_detach".to_string(); database - .run_sql(&format!( - "create publication {} for all tables with (publish_via_partition_root = true)", - publication_name - )) + .create_publication_for_all(&publication_name, None) .await .unwrap(); @@ -620,10 +617,7 @@ async fn partition_detach_with_all_tables_publication_does_replicate_detached_in // Create FOR ALL TABLES publication. let publication_name = "test_all_tables_restart".to_string(); database - .run_sql(&format!( - "create publication {} for all tables with (publish_via_partition_root = true)", - publication_name - )) + .create_publication_for_all(&publication_name, None) .await .unwrap(); @@ -728,3 +722,407 @@ async fn partition_detach_with_all_tables_publication_does_replicate_detached_in "Detached partition should have rows synced after pipeline restart" ); } + +/// Tests that partitioned tables replicate correctly with FOR TABLES IN SCHEMA publication. +/// The parent table in the schema should be tracked and all data should be replicated. +/// Requires PostgreSQL 15+ for FOR TABLES IN SCHEMA support. +#[tokio::test(flavor = "multi_thread")] +async fn partitioned_table_with_schema_publication_replicates_data() { + init_test_tracing(); + let database = spawn_source_database().await; + + // Skip test if PostgreSQL version is < 15 (FOR TABLES IN SCHEMA requires 15+). + if let Some(version) = database.server_version() { + if version.get() < 150000 { + eprintln!("Skipping test: PostgreSQL 15+ required for FOR TABLES IN SCHEMA"); + return; + } + } + + let table_name = test_table_name("partitioned_events_schema"); + let partition_specs = [ + ("p1", "from (1) to (100)"), + ("p2", "from (100) to (200)"), + ("p3", "from (200) to (300)"), + ]; + + let (parent_table_id, _partition_table_ids) = + create_partitioned_table(&database, table_name.clone(), &partition_specs) + .await + .expect("Failed to create partitioned table"); + + database + .run_sql(&format!( + "insert into {} (data, partition_key) values \ + ('event1', 50), ('event2', 150), ('event3', 250)", + table_name.as_quoted_identifier() + )) + .await + .unwrap(); + + // Create FOR TABLES IN SCHEMA publication. + let publication_name = "test_schema_pub".to_string(); + database + .create_publication_for_all(&publication_name, Some(&table_name.schema)) + .await + .expect("Failed to create publication"); + + let state_store = NotifyingStore::new(); + let destination = TestDestinationWrapper::wrap(MemoryDestination::new()); + + let parent_sync_done = state_store + .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone) + .await; + + let pipeline_id: PipelineId = random(); + let mut pipeline = create_pipeline( + &database.config, + pipeline_id, + publication_name, + state_store.clone(), + destination.clone(), + ); + + pipeline.start().await.unwrap(); + parent_sync_done.notified().await; + + let _ = pipeline.shutdown_and_wait().await; + + let table_rows = destination.get_table_rows().await; + let total_rows: usize = table_rows.values().map(|rows| rows.len()).sum(); + + assert_eq!( + total_rows, 3, + "Expected 3 rows synced from schema publication, but got {total_rows}" + ); + + let table_states = state_store.get_table_replication_states().await; + + assert!( + table_states.contains_key(&parent_table_id), + "Parent table should be tracked in state" + ); + assert_eq!( + table_states.len(), + 1, + "Only the parent table should be tracked in state" + ); + + let parent_table_rows = table_rows + .iter() + .filter(|(table_id, _)| **table_id == parent_table_id) + .map(|(_, rows)| rows.len()) + .sum::(); + assert_eq!( + parent_table_rows, 3, + "Parent table should contain all rows from schema publication" + ); +} + +/// Tests that detached partitions are not automatically discovered with FOR TABLES IN SCHEMA publication. +/// Similar to FOR ALL TABLES, the detached partition appears in pg_publication_tables but is not +/// automatically discovered by the running pipeline without restart. +/// Requires PostgreSQL 15+ for FOR TABLES IN SCHEMA support. +#[tokio::test(flavor = "multi_thread")] +async fn partition_detach_with_schema_publication_does_not_replicate_detached_inserts() { + init_test_tracing(); + let database = spawn_source_database().await; + + // Skip test if PostgreSQL version is < 15 (FOR TABLES IN SCHEMA requires 15+). + if let Some(version) = database.server_version() { + if version.get() < 150000 { + eprintln!("Skipping test: PostgreSQL 15+ required for FOR TABLES IN SCHEMA"); + return; + } + } + + let table_name = test_table_name("partitioned_events_schema_detach"); + let partition_specs = [("p1", "from (1) to (100)"), ("p2", "from (100) to (200)")]; + + let (parent_table_id, partition_table_ids) = + create_partitioned_table(&database, table_name.clone(), &partition_specs) + .await + .expect("Failed to create partitioned table"); + + let p1_table_id = partition_table_ids[0]; + + database + .run_sql(&format!( + "insert into {} (data, partition_key) values \ + ('event1', 50), ('event2', 150)", + table_name.as_quoted_identifier() + )) + .await + .unwrap(); + + // Create FOR TABLES IN SCHEMA publication. + let publication_name = "test_schema_pub_detach".to_string(); + database + .create_publication_for_all(&publication_name, Some(&table_name.schema)) + .await + .unwrap(); + + let state_store = NotifyingStore::new(); + let destination = TestDestinationWrapper::wrap(MemoryDestination::new()); + + let parent_sync_done = state_store + .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone) + .await; + + let pipeline_id: PipelineId = random(); + let mut pipeline = create_pipeline( + &database.config, + pipeline_id, + publication_name.clone(), + state_store.clone(), + destination.clone(), + ); + + pipeline.start().await.unwrap(); + parent_sync_done.notified().await; + + // Verify initial state. + let table_states_before = state_store.get_table_replication_states().await; + assert!( + table_states_before.contains_key(&parent_table_id), + "Parent table should be tracked before detachment" + ); + assert!( + !table_states_before.contains_key(&p1_table_id), + "Child partition p1 should NOT be tracked separately before detachment" + ); + + // Detach partition p1. + let child_p1_name = format!("{}_{}", table_name.name, "p1"); + let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name); + database + .run_sql(&format!( + "alter table {} detach partition {}", + table_name.as_quoted_identifier(), + child_p1_qualified + )) + .await + .unwrap(); + + // Verify catalog state. The detached partition should appear in pg_publication_tables. + let pub_tables_check = database + .client + .as_ref() + .unwrap() + .query( + "select count(*) as cnt from pg_publication_tables + where pubname = $1 and tablename = $2", + &[&publication_name, &child_p1_name], + ) + .await + .unwrap(); + let pub_tables_count: i64 = pub_tables_check[0].get("cnt"); + assert_eq!( + pub_tables_count, 1, + "Detached partition should appear in pg_publication_tables for TABLES IN SCHEMA publication" + ); + + // Insert into detached partition. + database + .run_sql(&format!( + "insert into {} (data, partition_key) values ('detached_event', 25)", + child_p1_qualified + )) + .await + .unwrap(); + + // Insert into parent table (should be replicated). + database + .run_sql(&format!( + "insert into {} (data, partition_key) values ('parent_event', 125)", + table_name.as_quoted_identifier() + )) + .await + .unwrap(); + + // Wait for the parent table insert to be replicated. + let inserts_notify = destination + .wait_for_events_count(vec![(EventType::Insert, 1)]) + .await; + inserts_notify.notified().await; + + let _ = pipeline.shutdown_and_wait().await; + + // The pipeline state should still only track the parent table. + let table_states_after = state_store.get_table_replication_states().await; + assert!( + table_states_after.contains_key(&parent_table_id), + "Parent table should still be tracked after detachment" + ); + + // Verify events. + let events = destination.get_events().await; + let grouped = group_events_by_type_and_table_id(&events); + + // Parent table should have 1 insert event. + let parent_inserts = grouped + .get(&(EventType::Insert, parent_table_id)) + .cloned() + .unwrap_or_default(); + assert_eq!( + parent_inserts.len(), + 1, + "Parent table should have exactly 1 CDC insert event" + ); + + // Detached partition inserts should NOT be replicated without table re-discovery. + let detached_inserts = grouped + .get(&(EventType::Insert, p1_table_id)) + .cloned() + .unwrap_or_default(); + assert_eq!( + detached_inserts.len(), + 0, + "Detached partition inserts should NOT be replicated without table re-discovery" + ); +} + +/// Tests that a detached partition is discovered as a new table after pipeline restart +/// with FOR TABLES IN SCHEMA publication. After restart, the detached partition in the same +/// schema should be discovered and its data replicated. +/// Requires PostgreSQL 15+ for FOR TABLES IN SCHEMA support. +#[tokio::test(flavor = "multi_thread")] +async fn partition_detach_with_schema_publication_does_replicate_detached_inserts_on_restart() { + init_test_tracing(); + let database = spawn_source_database().await; + + // Skip test if PostgreSQL version is < 15 (FOR TABLES IN SCHEMA requires 15+). + if let Some(version) = database.server_version() { + if version.get() < 150000 { + eprintln!("Skipping test: PostgreSQL 15+ required for FOR TABLES IN SCHEMA"); + return; + } + } + + let table_name = test_table_name("partitioned_events_schema_restart"); + let partition_specs = [("p1", "from (1) to (100)"), ("p2", "from (100) to (200)")]; + + let (parent_table_id, partition_table_ids) = + create_partitioned_table(&database, table_name.clone(), &partition_specs) + .await + .expect("Failed to create partitioned table"); + + let p1_table_id = partition_table_ids[0]; + + database + .run_sql(&format!( + "insert into {} (data, partition_key) values \ + ('event1', 50), ('event2', 150)", + table_name.as_quoted_identifier() + )) + .await + .unwrap(); + + // Create FOR TABLES IN SCHEMA publication. + let publication_name = "test_schema_pub_restart".to_string(); + database + .create_publication_for_all(&publication_name, Some(&table_name.schema)) + .await + .unwrap(); + + let state_store = NotifyingStore::new(); + let destination = TestDestinationWrapper::wrap(MemoryDestination::new()); + + // Start pipeline and wait for initial sync. + let parent_sync_done = state_store + .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone) + .await; + + let pipeline_id: PipelineId = random(); + let mut pipeline = create_pipeline( + &database.config, + pipeline_id, + publication_name.clone(), + state_store.clone(), + destination.clone(), + ); + + pipeline.start().await.unwrap(); + parent_sync_done.notified().await; + + // Verify initial state. + let table_states_before = state_store.get_table_replication_states().await; + assert!( + table_states_before.contains_key(&parent_table_id), + "Parent table should be tracked before detachment" + ); + assert!( + !table_states_before.contains_key(&p1_table_id), + "Child partition p1 should NOT be tracked separately before detachment" + ); + + // Detach partition p1. + let child_p1_name = format!("{}_{}", table_name.name, "p1"); + let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name); + database + .run_sql(&format!( + "alter table {} detach partition {}", + table_name.as_quoted_identifier(), + child_p1_qualified + )) + .await + .unwrap(); + + // Insert into detached partition (while pipeline is still running). + database + .run_sql(&format!( + "insert into {} (data, partition_key) values ('detached_event', 25)", + child_p1_qualified + )) + .await + .unwrap(); + + // Shutdown the pipeline. + let _ = pipeline.shutdown_and_wait().await; + + // Restart the pipeline. It should now discover the detached partition as a new table. + let detached_sync_done = state_store + .notify_on_table_state_type(p1_table_id, TableReplicationPhaseType::SyncDone) + .await; + + let mut pipeline = create_pipeline( + &database.config, + pipeline_id, + publication_name.clone(), + state_store.clone(), + destination.clone(), + ); + + pipeline.start().await.unwrap(); + + // Wait for the detached partition to be synced. + detached_sync_done.notified().await; + + let _ = pipeline.shutdown_and_wait().await; + + // Verify the detached partition was discovered and synced. + let table_states_after = state_store.get_table_replication_states().await; + assert!( + table_states_after.contains_key(&p1_table_id), + "Detached partition should be discovered as a standalone table after restart" + ); + + // Verify the data from the detached partition was copied. + let table_rows = destination.get_table_rows().await; + let parent_rows: usize = table_rows + .get(&parent_table_id) + .map(|rows| rows.len()) + .unwrap_or(0); + assert_eq!( + parent_rows, 2, + "Parent table should have the initial 2 rows from first pipeline run" + ); + let detached_rows: usize = table_rows + .get(&p1_table_id) + .map(|rows| rows.len()) + .unwrap_or(0); + assert_eq!( + detached_rows, 2, + "Detached partition should have 2 rows synced after pipeline restart (1 from initial data + 1 inserted)" + ); +} From bbfdf95931f5c5ba99149eef17a0791fdcc74427 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Fri, 24 Oct 2025 13:28:20 +0200 Subject: [PATCH 12/26] Improve --- etl/tests/pipeline_with_partitioned_table.rs | 96 -------------------- 1 file changed, 96 deletions(-) diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs index d1ab3c5f0..9935d8853 100644 --- a/etl/tests/pipeline_with_partitioned_table.rs +++ b/etl/tests/pipeline_with_partitioned_table.rs @@ -723,102 +723,6 @@ async fn partition_detach_with_all_tables_publication_does_replicate_detached_in ); } -/// Tests that partitioned tables replicate correctly with FOR TABLES IN SCHEMA publication. -/// The parent table in the schema should be tracked and all data should be replicated. -/// Requires PostgreSQL 15+ for FOR TABLES IN SCHEMA support. -#[tokio::test(flavor = "multi_thread")] -async fn partitioned_table_with_schema_publication_replicates_data() { - init_test_tracing(); - let database = spawn_source_database().await; - - // Skip test if PostgreSQL version is < 15 (FOR TABLES IN SCHEMA requires 15+). - if let Some(version) = database.server_version() { - if version.get() < 150000 { - eprintln!("Skipping test: PostgreSQL 15+ required for FOR TABLES IN SCHEMA"); - return; - } - } - - let table_name = test_table_name("partitioned_events_schema"); - let partition_specs = [ - ("p1", "from (1) to (100)"), - ("p2", "from (100) to (200)"), - ("p3", "from (200) to (300)"), - ]; - - let (parent_table_id, _partition_table_ids) = - create_partitioned_table(&database, table_name.clone(), &partition_specs) - .await - .expect("Failed to create partitioned table"); - - database - .run_sql(&format!( - "insert into {} (data, partition_key) values \ - ('event1', 50), ('event2', 150), ('event3', 250)", - table_name.as_quoted_identifier() - )) - .await - .unwrap(); - - // Create FOR TABLES IN SCHEMA publication. - let publication_name = "test_schema_pub".to_string(); - database - .create_publication_for_all(&publication_name, Some(&table_name.schema)) - .await - .expect("Failed to create publication"); - - let state_store = NotifyingStore::new(); - let destination = TestDestinationWrapper::wrap(MemoryDestination::new()); - - let parent_sync_done = state_store - .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone) - .await; - - let pipeline_id: PipelineId = random(); - let mut pipeline = create_pipeline( - &database.config, - pipeline_id, - publication_name, - state_store.clone(), - destination.clone(), - ); - - pipeline.start().await.unwrap(); - parent_sync_done.notified().await; - - let _ = pipeline.shutdown_and_wait().await; - - let table_rows = destination.get_table_rows().await; - let total_rows: usize = table_rows.values().map(|rows| rows.len()).sum(); - - assert_eq!( - total_rows, 3, - "Expected 3 rows synced from schema publication, but got {total_rows}" - ); - - let table_states = state_store.get_table_replication_states().await; - - assert!( - table_states.contains_key(&parent_table_id), - "Parent table should be tracked in state" - ); - assert_eq!( - table_states.len(), - 1, - "Only the parent table should be tracked in state" - ); - - let parent_table_rows = table_rows - .iter() - .filter(|(table_id, _)| **table_id == parent_table_id) - .map(|(_, rows)| rows.len()) - .sum::(); - assert_eq!( - parent_table_rows, 3, - "Parent table should contain all rows from schema publication" - ); -} - /// Tests that detached partitions are not automatically discovered with FOR TABLES IN SCHEMA publication. /// Similar to FOR ALL TABLES, the detached partition appears in pg_publication_tables but is not /// automatically discovered by the running pipeline without restart. From 65f0b52b13ba0c434cf2e56ffeec8edba1a28bb2 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Fri, 24 Oct 2025 13:31:54 +0200 Subject: [PATCH 13/26] Improve --- etl-postgres/src/tokio/test_utils.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/etl-postgres/src/tokio/test_utils.rs b/etl-postgres/src/tokio/test_utils.rs index 8a0d52601..d452c3564 100644 --- a/etl-postgres/src/tokio/test_utils.rs +++ b/etl-postgres/src/tokio/test_utils.rs @@ -89,7 +89,9 @@ impl PgDatabase { Some(schema_name) => format!( "create publication {publication_name} for tables in schema {schema_name} with (publish_via_partition_root = true)" ), - None => format!("create publication {publication_name} for all tables with (publish_via_partition_root = true)"), + None => format!( + "create publication {publication_name} for all tables with (publish_via_partition_root = true)" + ), }; client.execute(&create_publication_query, &[]).await?; @@ -115,8 +117,9 @@ impl PgDatabase { } } None => { - let create_publication_query = - format!("create publication {publication_name} for all tables with (publish_via_partition_root = true)"); + let create_publication_query = format!( + "create publication {publication_name} for all tables with (publish_via_partition_root = true)" + ); client.execute(&create_publication_query, &[]).await?; } } From 12416f44e5012145ab4828913247a1e4cdcc80c6 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Fri, 24 Oct 2025 13:37:00 +0200 Subject: [PATCH 14/26] Improve --- etl/tests/pipeline_with_partitioned_table.rs | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs index 9935d8853..3b9ec3313 100644 --- a/etl/tests/pipeline_with_partitioned_table.rs +++ b/etl/tests/pipeline_with_partitioned_table.rs @@ -386,8 +386,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_ // Insert into the detached partition (should NOT be replicated). database .run_sql(&format!( - "insert into {} (data, partition_key) values ('detached_event', 25)", - child_p1_qualified + "insert into {child_p1_qualified} (data, partition_key) values ('detached_event', 25)" )) .await .unwrap(); @@ -551,8 +550,7 @@ async fn partition_detach_with_all_tables_publication_does_not_replicate_detache // Insert into detached partition. database .run_sql(&format!( - "insert into {} (data, partition_key) values ('detached_event', 25)", - child_p1_qualified + "insert into {child_p1_qualified} (data, partition_key) values ('detached_event', 25)" )) .await .unwrap(); @@ -667,8 +665,7 @@ async fn partition_detach_with_all_tables_publication_does_replicate_detached_in // Insert into detached partition (while pipeline is stopped). database .run_sql(&format!( - "insert into {} (data, partition_key) values ('detached_event', 25)", - child_p1_qualified + "insert into {child_p1_qualified} (data, partition_key) values ('detached_event', 25)" )) .await .unwrap(); @@ -829,8 +826,7 @@ async fn partition_detach_with_schema_publication_does_not_replicate_detached_in // Insert into detached partition. database .run_sql(&format!( - "insert into {} (data, partition_key) values ('detached_event', 25)", - child_p1_qualified + "insert into {child_p1_qualified} (data, partition_key) values ('detached_event', 25)" )) .await .unwrap(); @@ -975,8 +971,7 @@ async fn partition_detach_with_schema_publication_does_replicate_detached_insert // Insert into detached partition (while pipeline is still running). database .run_sql(&format!( - "insert into {} (data, partition_key) values ('detached_event', 25)", - child_p1_qualified + "insert into {child_p1_qualified} (data, partition_key) values ('detached_event', 25)" )) .await .unwrap(); From 2127af1f318ff03a6a51893fdccb0d5be6bfebd4 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Fri, 24 Oct 2025 15:48:32 +0200 Subject: [PATCH 15/26] Improve --- etl/src/test_utils/event.rs | 19 +- etl/tests/pipeline_with_partitioned_table.rs | 181 +++++++++++++++++-- 2 files changed, 181 insertions(+), 19 deletions(-) diff --git a/etl/src/test_utils/event.rs b/etl/src/test_utils/event.rs index d05615f5f..b30361877 100644 --- a/etl/src/test_utils/event.rs +++ b/etl/src/test_utils/event.rs @@ -22,15 +22,20 @@ pub fn group_events_by_type_and_table_id( for event in events { let event_type = EventType::from(event); // This grouping only works on simple DML operations. - let table_id = match event { - Event::Insert(event) => Some(event.table_id), - Event::Update(event) => Some(event.table_id), - Event::Delete(event) => Some(event.table_id), - _ => None, + let table_ids = match event { + Event::Insert(event) => vec![event.table_id], + Event::Update(event) => vec![event.table_id], + Event::Delete(event) => vec![event.table_id], + Event::Truncate(event) => event + .rel_ids + .iter() + .map(|rel_id| TableId::new(*rel_id)) + .collect(), + _ => vec![], }; - if let Some(table_id) = table_id { + for table_id in table_ids { grouped - .entry((event_type, table_id)) + .entry((event_type.clone(), table_id)) .or_insert_with(Vec::new) .push(event.clone()); } diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs index 3b9ec3313..5974b489c 100644 --- a/etl/tests/pipeline_with_partitioned_table.rs +++ b/etl/tests/pipeline_with_partitioned_table.rs @@ -163,6 +163,11 @@ async fn partitioned_table_copy_and_streams_new_data_from_new_partition() { .await .unwrap(); + // Wait for CDC to deliver the new row. + let inserts_notify = destination + .wait_for_events_count(vec![(EventType::Insert, 1)]) + .await; + database .run_sql(&format!( "insert into {} (data, partition_key) values ('event3', 250)", @@ -171,10 +176,6 @@ async fn partitioned_table_copy_and_streams_new_data_from_new_partition() { .await .unwrap(); - // Wait for CDC to deliver the new row. - let inserts_notify = destination - .wait_for_events_count(vec![(EventType::Insert, 1)]) - .await; inserts_notify.notified().await; let _ = pipeline.shutdown_and_wait().await; @@ -305,6 +306,160 @@ async fn partition_drop_does_not_emit_delete_or_truncate() { ); } +/// Tests that issuing a TRUNCATE at the parent table level does emit a TRUNCATE event in the +/// replication stream. +#[tokio::test(flavor = "multi_thread")] +async fn parent_table_truncate_does_emit_truncate_event() { + init_test_tracing(); + let database = spawn_source_database().await; + + let table_name = test_table_name("partitioned_events_truncate"); + let partition_specs = [("p1", "from (1) to (100)"), ("p2", "from (100) to (200)")]; + + let (parent_table_id, _partition_table_ids) = + create_partitioned_table(&database, table_name.clone(), &partition_specs) + .await + .expect("Failed to create partitioned table"); + + database + .run_sql(&format!( + "insert into {} (data, partition_key) values \ + ('event1', 50), ('event2', 150)", + table_name.as_quoted_identifier() + )) + .await + .unwrap(); + + let publication_name = "test_partitioned_pub_truncate".to_string(); + database + .create_publication(&publication_name, std::slice::from_ref(&table_name)) + .await + .expect("Failed to create publication"); + + let state_store = NotifyingStore::new(); + let destination = TestDestinationWrapper::wrap(MemoryDestination::new()); + + let parent_sync_done = state_store + .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone) + .await; + + let pipeline_id: PipelineId = random(); + let mut pipeline = create_pipeline( + &database.config, + pipeline_id, + publication_name, + state_store.clone(), + destination.clone(), + ); + + pipeline.start().await.unwrap(); + + parent_sync_done.notified().await; + + // Wait for the parent table truncate to be replicated. + let truncate_notify = destination + .wait_for_events_count(vec![(EventType::Truncate, 1)]) + .await; + + // We truncate the parent table. + database + .run_sql(&format!( + "truncate table {}", + table_name.as_quoted_identifier(), + )) + .await + .unwrap(); + + truncate_notify.notified().await; + + let _ = pipeline.shutdown_and_wait().await; + + let events = destination.get_events().await; + let grouped_events = group_events_by_type_and_table_id(&events); + let truncate_events = grouped_events + .get(&(EventType::Truncate, parent_table_id)) + .map(|v| v.len()) + .unwrap_or(0); + + assert_eq!( + truncate_events, 1, + "Truncate event should be emitted for the parent table" + ); +} + +/// Tests that issuing a TRUNCATE at the child table level does NOT emit a TRUNCATE event in the +/// replication stream. +#[tokio::test(flavor = "multi_thread")] +async fn child_table_truncate_does_not_emit_truncate_event() { + init_test_tracing(); + let database = spawn_source_database().await; + + let table_name = test_table_name("partitioned_events_truncate"); + let partition_specs = [("p1", "from (1) to (100)"), ("p2", "from (100) to (200)")]; + + let (parent_table_id, _partition_table_ids) = + create_partitioned_table(&database, table_name.clone(), &partition_specs) + .await + .expect("Failed to create partitioned table"); + + database + .run_sql(&format!( + "insert into {} (data, partition_key) values \ + ('event1', 50), ('event2', 150)", + table_name.as_quoted_identifier() + )) + .await + .unwrap(); + + let publication_name = "test_partitioned_pub_truncate".to_string(); + database + .create_publication(&publication_name, std::slice::from_ref(&table_name)) + .await + .expect("Failed to create publication"); + + let state_store = NotifyingStore::new(); + let destination = TestDestinationWrapper::wrap(MemoryDestination::new()); + + let parent_sync_done = state_store + .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone) + .await; + + let pipeline_id: PipelineId = random(); + let mut pipeline = create_pipeline( + &database.config, + pipeline_id, + publication_name, + state_store.clone(), + destination.clone(), + ); + + pipeline.start().await.unwrap(); + + parent_sync_done.notified().await; + + // We truncate the child table. + let child_p1_name = format!("{}_{}", table_name.name, "p1"); + let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name); + database + .run_sql(&format!("truncate table {child_p1_qualified}",)) + .await + .unwrap(); + + let _ = pipeline.shutdown_and_wait().await; + + let events = destination.get_events().await; + let grouped_events = group_events_by_type_and_table_id(&events); + let truncate_events = grouped_events + .get(&(EventType::Truncate, parent_table_id)) + .map(|v| v.len()) + .unwrap_or(0); + + assert_eq!( + truncate_events, 0, + "Truncate event should be not emitted for the child table" + ); +} + /// Tests that detached partitions are not replicated with explicit publications. /// Once detached, the partition becomes independent and is not in the publication since /// only the parent table was explicitly added. Inserts to detached partitions are not replicated. @@ -391,6 +546,11 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_ .await .unwrap(); + // Wait for the parent table insert to be replicated. + let inserts_notify = destination + .wait_for_events_count(vec![(EventType::Insert, 1)]) + .await; + // Insert into the parent table (should be replicated to remaining partition p2). database .run_sql(&format!( @@ -400,10 +560,6 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_ .await .unwrap(); - // Wait for the parent table insert to be replicated. - let inserts_notify = destination - .wait_for_events_count(vec![(EventType::Insert, 1)]) - .await; inserts_notify.notified().await; let _ = pipeline.shutdown_and_wait().await; @@ -831,6 +987,11 @@ async fn partition_detach_with_schema_publication_does_not_replicate_detached_in .await .unwrap(); + // Wait for the parent table insert to be replicated. + let inserts_notify = destination + .wait_for_events_count(vec![(EventType::Insert, 1)]) + .await; + // Insert into parent table (should be replicated). database .run_sql(&format!( @@ -840,10 +1001,6 @@ async fn partition_detach_with_schema_publication_does_not_replicate_detached_in .await .unwrap(); - // Wait for the parent table insert to be replicated. - let inserts_notify = destination - .wait_for_events_count(vec![(EventType::Insert, 1)]) - .await; inserts_notify.notified().await; let _ = pipeline.shutdown_and_wait().await; From a51a8d108ed54e1d9a4e63b95b1a1055cde97a71 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Fri, 24 Oct 2025 16:31:15 +0200 Subject: [PATCH 16/26] Improve --- etl-postgres/src/tokio/test_utils.rs | 25 ++++-- etl/tests/pipeline_with_partitioned_table.rs | 91 ++++++++++++++++++++ 2 files changed, 110 insertions(+), 6 deletions(-) diff --git a/etl-postgres/src/tokio/test_utils.rs b/etl-postgres/src/tokio/test_utils.rs index d452c3564..5c6b76123 100644 --- a/etl-postgres/src/tokio/test_utils.rs +++ b/etl-postgres/src/tokio/test_utils.rs @@ -46,14 +46,16 @@ impl PgDatabase { self.server_version } - /// Creates a Postgres publication for the specified tables. + /// Creates a Postgres publication for the specified tables with an optional configuration + /// parameter. /// - /// Sets up logical replication by creating a publication that includes - /// the given tables for change data capture. - pub async fn create_publication( + /// This method is used for specific cases which should mutate the defaults when creating a + /// publication which is done only for a small subset of tests. + pub async fn create_publication_with_config( &self, publication_name: &str, table_names: &[TableName], + publish_via_partition_root: bool, ) -> Result<(), tokio_postgres::Error> { let table_names = table_names .iter() @@ -61,9 +63,10 @@ impl PgDatabase { .collect::>(); let create_publication_query = format!( - "create publication {} for table {} with (publish_via_partition_root = true)", + "create publication {} for table {} with (publish_via_partition_root = {})", publication_name, - table_names.join(", ") + table_names.join(", "), + publish_via_partition_root ); self.client .as_ref() @@ -74,6 +77,16 @@ impl PgDatabase { Ok(()) } + /// Creates a Postgres publication for the specified tables. + pub async fn create_publication( + &self, + publication_name: &str, + table_names: &[TableName], + ) -> Result<(), tokio_postgres::Error> { + self.create_publication_with_config(publication_name, table_names, true) + .await + } + pub async fn create_publication_for_all( &self, publication_name: &str, diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs index 5974b489c..7d8d453ba 100644 --- a/etl/tests/pipeline_with_partitioned_table.rs +++ b/etl/tests/pipeline_with_partitioned_table.rs @@ -1182,3 +1182,94 @@ async fn partition_detach_with_schema_publication_does_replicate_detached_insert "Detached partition should have 2 rows synced after pipeline restart (1 from initial data + 1 inserted)" ); } + +/// Tests that the system gracefully stops in case `publish_via_partition_root` is set to `false` +/// which is currently not supported. +#[tokio::test(flavor = "multi_thread")] +async fn partitioned_table_with_publish_via_root_false() { + init_test_tracing(); + let database = spawn_source_database().await; + + let table_name = test_table_name("partitioned_events"); + let partition_specs = [("p1", "from (1) to (100)"), ("p2", "from (100) to (200)")]; + + let (parent_table_id, _partition_table_ids) = + create_partitioned_table(&database, table_name.clone(), &partition_specs) + .await + .expect("Failed to create partitioned table"); + + database + .run_sql(&format!( + "insert into {} (data, partition_key) values + ('event1', 50), ('event2', 150)", + table_name.as_quoted_identifier() + )) + .await + .unwrap(); + + let publication_name = "test_partitioned_pub".to_string(); + database + .create_publication_with_config(&publication_name, std::slice::from_ref(&table_name), false) + .await + .expect("Failed to create publication"); + + let state_store = NotifyingStore::new(); + let destination = TestDestinationWrapper::wrap(MemoryDestination::new()); + + let pipeline_id: PipelineId = random(); + let mut pipeline = create_pipeline( + &database.config, + pipeline_id, + publication_name, + state_store.clone(), + destination.clone(), + ); + + // Wait on the sync done of the parent. + let parent_sync_done = state_store + .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone) + .await; + + pipeline.start().await.unwrap(); + + // Wait on the sync done of the parent. + parent_sync_done.notified().await; + + // Wait for the COMMIT event of the insert in the parent table. COMMIT events are always + // processed unconditionally because they don't contain relation-specific information. + // + // We use the COMMIT event to verify transaction processing: we can check whether the + // transaction's component events were captured. In this case, they should NOT be present + // because when `publication_via_partition_root` is `false`, events are tagged with child + // table OIDs. Since these child table OIDs are unknown to us (we always try to find the parent oid), + // those events are skipped. + let commit = destination + .wait_for_events_count(vec![(EventType::Commit, 1)]) + .await; + + database + .run_sql(&format!( + "insert into {} (data, partition_key) values \ + ('event1', 50)", + table_name.as_quoted_identifier() + )) + .await + .unwrap(); + + commit.notified().await; + + pipeline.shutdown_and_wait().await.unwrap(); + + // No inserts should be captured for the reasons explained above. + let events = destination.get_events().await; + let grouped_events = group_events_by_type_and_table_id(&events); + let p1_inserts = grouped_events + .get(&(EventType::Insert, parent_table_id)) + .cloned() + .unwrap_or_default(); + assert_eq!( + p1_inserts.len(), + 0, + "Inserts in partition 'p1' should be skipped because `publish_via_partition_root` is `false`" + ); +} From c87820f2dd514d61a51b0692dc30034ad85e7a8c Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Fri, 24 Oct 2025 16:47:27 +0200 Subject: [PATCH 17/26] Improve --- etl/tests/pipeline_with_partitioned_table.rs | 336 ++++++------------- 1 file changed, 108 insertions(+), 228 deletions(-) diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs index 7d8d453ba..f049a9840 100644 --- a/etl/tests/pipeline_with_partitioned_table.rs +++ b/etl/tests/pipeline_with_partitioned_table.rs @@ -30,12 +30,11 @@ async fn partitioned_table_copy_replicates_existing_data() { let (parent_table_id, _partition_table_ids) = create_partitioned_table(&database, table_name.clone(), &partition_specs) .await - .expect("Failed to create partitioned table"); + .unwrap(); database .run_sql(&format!( - "insert into {} (data, partition_key) values - ('event1', 50), ('event2', 150), ('event3', 250)", + "insert into {} (data, partition_key) values ('event1', 50), ('event2', 150), ('event3', 250)", table_name.as_quoted_identifier() )) .await @@ -45,7 +44,7 @@ async fn partitioned_table_copy_replicates_existing_data() { database .create_publication(&publication_name, std::slice::from_ref(&table_name)) .await - .expect("Failed to create publication"); + .unwrap(); let state_store = NotifyingStore::new(); let destination = TestDestinationWrapper::wrap(MemoryDestination::new()); @@ -73,32 +72,19 @@ async fn partitioned_table_copy_replicates_existing_data() { let table_rows = destination.get_table_rows().await; let total_rows: usize = table_rows.values().map(|rows| rows.len()).sum(); - assert_eq!( - total_rows, 3, - "Expected 3 rows synced (one per partition), but got {total_rows}" - ); + assert_eq!(total_rows, 3); let table_states = state_store.get_table_replication_states().await; - assert!( - table_states.contains_key(&parent_table_id), - "Parent table should be tracked in state" - ); - assert_eq!( - table_states.len(), - 1, - "Only the parent table should be tracked in state" - ); + assert!(table_states.contains_key(&parent_table_id)); + assert_eq!(table_states.len(), 1); let parent_table_rows = table_rows .iter() .filter(|(table_id, _)| **table_id == parent_table_id) .map(|(_, rows)| rows.len()) .sum::(); - assert_eq!( - parent_table_rows, 3, - "Parent table should contain all rows when publishing via root" - ); + assert_eq!(parent_table_rows, 3); } /// Tests that CDC streams inserts to partitions created after pipeline startup. @@ -114,12 +100,11 @@ async fn partitioned_table_copy_and_streams_new_data_from_new_partition() { let (parent_table_id, _initial_partition_table_ids) = create_partitioned_table(&database, table_name.clone(), &initial_partition_specs) .await - .expect("Failed to create initial partitioned table"); + .unwrap(); database .run_sql(&format!( - "insert into {} (data, partition_key) values \ - ('event1', 50), ('event2', 150)", + "insert into {} (data, partition_key) values ('event1', 50), ('event2', 150)", table_name.as_quoted_identifier() )) .await @@ -129,7 +114,7 @@ async fn partitioned_table_copy_and_streams_new_data_from_new_partition() { database .create_publication(&publication_name, std::slice::from_ref(&table_name)) .await - .expect("Failed to create publication"); + .unwrap(); let state_store = NotifyingStore::new(); let destination = TestDestinationWrapper::wrap(MemoryDestination::new()); @@ -182,10 +167,7 @@ async fn partitioned_table_copy_and_streams_new_data_from_new_partition() { let table_rows = destination.get_table_rows().await; let total_rows: usize = table_rows.values().map(|rows| rows.len()).sum(); - assert_eq!( - total_rows, 2, - "Expected 2 rows synced from initial copy, got {total_rows}" - ); + assert_eq!(total_rows, 2); let table_states = state_store.get_table_replication_states().await; assert!(table_states.contains_key(&parent_table_id)); @@ -220,12 +202,11 @@ async fn partition_drop_does_not_emit_delete_or_truncate() { let (parent_table_id, _partition_table_ids) = create_partitioned_table(&database, table_name.clone(), &partition_specs) .await - .expect("Failed to create partitioned table"); + .unwrap(); database .run_sql(&format!( - "insert into {} (data, partition_key) values \ - ('event1', 50), ('event2', 150)", + "insert into {} (data, partition_key) values ('event1', 50), ('event2', 150)", table_name.as_quoted_identifier() )) .await @@ -235,7 +216,7 @@ async fn partition_drop_does_not_emit_delete_or_truncate() { database .create_publication(&publication_name, std::slice::from_ref(&table_name)) .await - .expect("Failed to create publication"); + .unwrap(); let state_store = NotifyingStore::new(); let destination = TestDestinationWrapper::wrap(MemoryDestination::new()); @@ -258,28 +239,28 @@ async fn partition_drop_does_not_emit_delete_or_truncate() { let events_before = destination.get_events().await; let grouped_before = group_events_by_type_and_table_id(&events_before); - let del_before = grouped_before + let delete_count_before = grouped_before .get(&(EventType::Delete, parent_table_id)) .map(|v| v.len()) .unwrap_or(0); - let trunc_before = grouped_before + let truncate_count_before = grouped_before .get(&(EventType::Truncate, parent_table_id)) .map(|v| v.len()) .unwrap_or(0); // Detach and drop one child partition (DDL should not generate DML events). - let child_p1_name = format!("{}_{}", table_name.name, "p1"); - let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name); + let partition_p1_name = format!("{}_{}", table_name.name, "p1"); + let partition_p1_qualified = format!("{}.{}", table_name.schema, partition_p1_name); database .run_sql(&format!( "alter table {} detach partition {}", table_name.as_quoted_identifier(), - child_p1_qualified + partition_p1_qualified )) .await .unwrap(); database - .run_sql(&format!("drop table {child_p1_qualified}")) + .run_sql(&format!("drop table {partition_p1_qualified}")) .await .unwrap(); @@ -287,23 +268,17 @@ async fn partition_drop_does_not_emit_delete_or_truncate() { let events_after = destination.get_events().await; let grouped_after = group_events_by_type_and_table_id(&events_after); - let del_after = grouped_after + let delete_count_after = grouped_after .get(&(EventType::Delete, parent_table_id)) .map(|v| v.len()) .unwrap_or(0); - let trunc_after = grouped_after + let truncate_count_after = grouped_after .get(&(EventType::Truncate, parent_table_id)) .map(|v| v.len()) .unwrap_or(0); - assert_eq!( - del_after, del_before, - "Partition drop must not emit DELETE events" - ); - assert_eq!( - trunc_after, trunc_before, - "Partition drop must not emit TRUNCATE events" - ); + assert_eq!(delete_count_after, delete_count_before); + assert_eq!(truncate_count_after, truncate_count_before); } /// Tests that issuing a TRUNCATE at the parent table level does emit a TRUNCATE event in the @@ -319,12 +294,11 @@ async fn parent_table_truncate_does_emit_truncate_event() { let (parent_table_id, _partition_table_ids) = create_partitioned_table(&database, table_name.clone(), &partition_specs) .await - .expect("Failed to create partitioned table"); + .unwrap(); database .run_sql(&format!( - "insert into {} (data, partition_key) values \ - ('event1', 50), ('event2', 150)", + "insert into {} (data, partition_key) values ('event1', 50), ('event2', 150)", table_name.as_quoted_identifier() )) .await @@ -334,7 +308,7 @@ async fn parent_table_truncate_does_emit_truncate_event() { database .create_publication(&publication_name, std::slice::from_ref(&table_name)) .await - .expect("Failed to create publication"); + .unwrap(); let state_store = NotifyingStore::new(); let destination = TestDestinationWrapper::wrap(MemoryDestination::new()); @@ -376,15 +350,12 @@ async fn parent_table_truncate_does_emit_truncate_event() { let events = destination.get_events().await; let grouped_events = group_events_by_type_and_table_id(&events); - let truncate_events = grouped_events + let truncate_count = grouped_events .get(&(EventType::Truncate, parent_table_id)) .map(|v| v.len()) .unwrap_or(0); - assert_eq!( - truncate_events, 1, - "Truncate event should be emitted for the parent table" - ); + assert_eq!(truncate_count, 1); } /// Tests that issuing a TRUNCATE at the child table level does NOT emit a TRUNCATE event in the @@ -400,12 +371,11 @@ async fn child_table_truncate_does_not_emit_truncate_event() { let (parent_table_id, _partition_table_ids) = create_partitioned_table(&database, table_name.clone(), &partition_specs) .await - .expect("Failed to create partitioned table"); + .unwrap(); database .run_sql(&format!( - "insert into {} (data, partition_key) values \ - ('event1', 50), ('event2', 150)", + "insert into {} (data, partition_key) values ('event1', 50), ('event2', 150)", table_name.as_quoted_identifier() )) .await @@ -415,7 +385,7 @@ async fn child_table_truncate_does_not_emit_truncate_event() { database .create_publication(&publication_name, std::slice::from_ref(&table_name)) .await - .expect("Failed to create publication"); + .unwrap(); let state_store = NotifyingStore::new(); let destination = TestDestinationWrapper::wrap(MemoryDestination::new()); @@ -438,10 +408,10 @@ async fn child_table_truncate_does_not_emit_truncate_event() { parent_sync_done.notified().await; // We truncate the child table. - let child_p1_name = format!("{}_{}", table_name.name, "p1"); - let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name); + let partition_p1_name = format!("{}_{}", table_name.name, "p1"); + let partition_p1_qualified = format!("{}.{}", table_name.schema, partition_p1_name); database - .run_sql(&format!("truncate table {child_p1_qualified}",)) + .run_sql(&format!("truncate table {partition_p1_qualified}")) .await .unwrap(); @@ -449,15 +419,12 @@ async fn child_table_truncate_does_not_emit_truncate_event() { let events = destination.get_events().await; let grouped_events = group_events_by_type_and_table_id(&events); - let truncate_events = grouped_events + let truncate_count = grouped_events .get(&(EventType::Truncate, parent_table_id)) .map(|v| v.len()) .unwrap_or(0); - assert_eq!( - truncate_events, 0, - "Truncate event should be not emitted for the child table" - ); + assert_eq!(truncate_count, 0); } /// Tests that detached partitions are not replicated with explicit publications. @@ -474,15 +441,14 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_ let (parent_table_id, partition_table_ids) = create_partitioned_table(&database, table_name.clone(), &partition_specs) .await - .expect("Failed to create partitioned table"); + .unwrap(); let p1_table_id = partition_table_ids[0]; // Insert initial data into both partitions. database .run_sql(&format!( - "insert into {} (data, partition_key) values \ - ('event1', 50), ('event2', 150)", + "insert into {} (data, partition_key) values ('event1', 50), ('event2', 150)", table_name.as_quoted_identifier() )) .await @@ -493,7 +459,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_ database .create_publication(&publication_name, std::slice::from_ref(&table_name)) .await - .expect("Failed to create publication"); + .unwrap(); let state_store = NotifyingStore::new(); let destination = TestDestinationWrapper::wrap(MemoryDestination::new()); @@ -521,19 +487,16 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_ .get(&parent_table_id) .map(|rows| rows.len()) .unwrap_or(0); - assert_eq!( - parent_rows, 2, - "Parent table should have 2 rows from initial COPY" - ); + assert_eq!(parent_rows, 2); // Detach partition p1 from parent. - let child_p1_name = format!("{}_{}", table_name.name, "p1"); - let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name); + let partition_p1_name = format!("{}_{}", table_name.name, "p1"); + let partition_p1_qualified = format!("{}.{}", table_name.schema, partition_p1_name); database .run_sql(&format!( "alter table {} detach partition {}", table_name.as_quoted_identifier(), - child_p1_qualified + partition_p1_qualified )) .await .unwrap(); @@ -541,7 +504,7 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_ // Insert into the detached partition (should NOT be replicated). database .run_sql(&format!( - "insert into {child_p1_qualified} (data, partition_key) values ('detached_event', 25)" + "insert into {partition_p1_qualified} (data, partition_key) values ('detached_event', 25)" )) .await .unwrap(); @@ -573,22 +536,14 @@ async fn partition_detach_with_explicit_publication_does_not_replicate_detached_ .get(&(EventType::Insert, parent_table_id)) .cloned() .unwrap_or_default(); - assert_eq!( - parent_inserts.len(), - 1, - "Parent table should have exactly 1 CDC insert event" - ); + assert_eq!(parent_inserts.len(), 1); // Detached partition should have NO insert events. let detached_inserts = grouped .get(&(EventType::Insert, p1_table_id)) .cloned() .unwrap_or_default(); - assert_eq!( - detached_inserts.len(), - 0, - "Detached partition inserts should NOT be replicated" - ); + assert_eq!(detached_inserts.len(), 0); } /// Tests catalog state when a partition is detached with FOR ALL TABLES publication. @@ -605,15 +560,14 @@ async fn partition_detach_with_all_tables_publication_does_not_replicate_detache let (parent_table_id, partition_table_ids) = create_partitioned_table(&database, table_name.clone(), &partition_specs) .await - .expect("Failed to create partitioned table"); + .unwrap(); let p1_table_id = partition_table_ids[0]; // Insert initial data. database .run_sql(&format!( - "insert into {} (data, partition_key) values \ - ('event1', 50), ('event2', 150)", + "insert into {} (data, partition_key) values ('event1', 50), ('event2', 150)", table_name.as_quoted_identifier() )) .await @@ -647,23 +601,17 @@ async fn partition_detach_with_all_tables_publication_does_not_replicate_detache // Verify the initial state. The parent table is the only table tracked. let table_states_before = state_store.get_table_replication_states().await; - assert!( - table_states_before.contains_key(&parent_table_id), - "Parent table should be tracked before detachment" - ); - assert!( - !table_states_before.contains_key(&p1_table_id), - "Child partition p1 should NOT be tracked separately before detachment" - ); + assert!(table_states_before.contains_key(&parent_table_id)); + assert!(!table_states_before.contains_key(&p1_table_id)); // Detach partition p1. - let child_p1_name = format!("{}_{}", table_name.name, "p1"); - let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name); + let partition_p1_name = format!("{}_{}", table_name.name, "p1"); + let partition_p1_qualified = format!("{}.{}", table_name.schema, partition_p1_name); database .run_sql(&format!( "alter table {} detach partition {}", table_name.as_quoted_identifier(), - child_p1_qualified + partition_p1_qualified )) .await .unwrap(); @@ -680,10 +628,7 @@ async fn partition_detach_with_all_tables_publication_does_not_replicate_detache .await .unwrap(); let inherits_count: i64 = inherits_check[0].get("cnt"); - assert_eq!( - inherits_count, 0, - "Detached partition should have no parent in pg_inherits" - ); + assert_eq!(inherits_count, 0); // Check pg_publication_tables. With FOR ALL TABLES, the detached partition should appear. let pub_tables_check = database @@ -693,20 +638,17 @@ async fn partition_detach_with_all_tables_publication_does_not_replicate_detache .query( "select count(*) as cnt from pg_publication_tables where pubname = $1 and tablename = $2", - &[&publication_name, &child_p1_name], + &[&publication_name, &partition_p1_name], ) .await .unwrap(); let pub_tables_count: i64 = pub_tables_check[0].get("cnt"); - assert_eq!( - pub_tables_count, 1, - "Detached partition should appear in pg_publication_tables for ALL TABLES publication" - ); + assert_eq!(pub_tables_count, 1); // Insert into detached partition. database .run_sql(&format!( - "insert into {child_p1_qualified} (data, partition_key) values ('detached_event', 25)" + "insert into {partition_p1_qualified} (data, partition_key) values ('detached_event', 25)" )) .await .unwrap(); @@ -720,10 +662,7 @@ async fn partition_detach_with_all_tables_publication_does_not_replicate_detache // The pipeline state should still only track the parent table (not the detached partition) // because it hasn't re-scanned for new tables. let table_states_after = state_store.get_table_replication_states().await; - assert!( - table_states_after.contains_key(&parent_table_id), - "Parent table should still be tracked after detachment" - ); + assert!(table_states_after.contains_key(&parent_table_id)); // The detached partition insert should NOT be replicated in this pipeline run // because the pipeline hasn't discovered it as a new table. @@ -733,11 +672,7 @@ async fn partition_detach_with_all_tables_publication_does_not_replicate_detache .get(&(EventType::Insert, p1_table_id)) .cloned() .unwrap_or_default(); - assert_eq!( - detached_inserts.len(), - 0, - "Detached partition inserts should NOT be replicated without table re-discovery" - ); + assert_eq!(detached_inserts.len(), 0); } /// Tests that a detached partition is discovered as a new table after pipeline restart. @@ -754,15 +689,14 @@ async fn partition_detach_with_all_tables_publication_does_replicate_detached_in let (parent_table_id, partition_table_ids) = create_partitioned_table(&database, table_name.clone(), &partition_specs) .await - .expect("Failed to create partitioned table"); + .unwrap(); let p1_table_id = partition_table_ids[0]; // Insert initial data. database .run_sql(&format!( - "insert into {} (data, partition_key) values \ - ('event1', 50), ('event2', 150)", + "insert into {} (data, partition_key) values ('event1', 50), ('event2', 150)", table_name.as_quoted_identifier() )) .await @@ -797,23 +731,17 @@ async fn partition_detach_with_all_tables_publication_does_replicate_detached_in // Verify the initial state. The parent table is the only table tracked. let table_states_before = state_store.get_table_replication_states().await; - assert!( - table_states_before.contains_key(&parent_table_id), - "Parent table should be tracked before detachment" - ); - assert!( - !table_states_before.contains_key(&p1_table_id), - "Child partition p1 should NOT be tracked separately before detachment" - ); + assert!(table_states_before.contains_key(&parent_table_id)); + assert!(!table_states_before.contains_key(&p1_table_id)); // Detach partition p1. - let child_p1_name = format!("{}_{}", table_name.name, "p1"); - let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name); + let partition_p1_name = format!("{}_{}", table_name.name, "p1"); + let partition_p1_qualified = format!("{}.{}", table_name.schema, partition_p1_name); database .run_sql(&format!( "alter table {} detach partition {}", table_name.as_quoted_identifier(), - child_p1_qualified + partition_p1_qualified )) .await .unwrap(); @@ -821,7 +749,7 @@ async fn partition_detach_with_all_tables_publication_does_replicate_detached_in // Insert into detached partition (while pipeline is stopped). database .run_sql(&format!( - "insert into {child_p1_qualified} (data, partition_key) values ('detached_event', 25)" + "insert into {partition_p1_qualified} (data, partition_key) values ('detached_event', 25)" )) .await .unwrap(); @@ -851,29 +779,20 @@ async fn partition_detach_with_all_tables_publication_does_replicate_detached_in // Verify the detached partition was discovered and synced. let table_states_after = state_store.get_table_replication_states().await; - assert!( - table_states_after.contains_key(&p1_table_id), - "Detached partition should be discovered as a standalone table after restart" - ); + assert!(table_states_after.contains_key(&p1_table_id)); // Verify the data from the detached partition was copied. let table_rows = destination.get_table_rows().await; let parent_rows: usize = table_rows - .get(&p1_table_id) + .get(&parent_table_id) .map(|rows| rows.len()) .unwrap_or(0); - assert_eq!( - parent_rows, 2, - "The parent table should have the initial rows" - ); + assert_eq!(parent_rows, 2); let detached_rows: usize = table_rows .get(&p1_table_id) .map(|rows| rows.len()) .unwrap_or(0); - assert_eq!( - detached_rows, 2, - "Detached partition should have rows synced after pipeline restart" - ); + assert_eq!(detached_rows, 2); } /// Tests that detached partitions are not automatically discovered with FOR TABLES IN SCHEMA publication. @@ -899,14 +818,13 @@ async fn partition_detach_with_schema_publication_does_not_replicate_detached_in let (parent_table_id, partition_table_ids) = create_partitioned_table(&database, table_name.clone(), &partition_specs) .await - .expect("Failed to create partitioned table"); + .unwrap(); let p1_table_id = partition_table_ids[0]; database .run_sql(&format!( - "insert into {} (data, partition_key) values \ - ('event1', 50), ('event2', 150)", + "insert into {} (data, partition_key) values ('event1', 50), ('event2', 150)", table_name.as_quoted_identifier() )) .await @@ -940,23 +858,17 @@ async fn partition_detach_with_schema_publication_does_not_replicate_detached_in // Verify initial state. let table_states_before = state_store.get_table_replication_states().await; - assert!( - table_states_before.contains_key(&parent_table_id), - "Parent table should be tracked before detachment" - ); - assert!( - !table_states_before.contains_key(&p1_table_id), - "Child partition p1 should NOT be tracked separately before detachment" - ); + assert!(table_states_before.contains_key(&parent_table_id)); + assert!(!table_states_before.contains_key(&p1_table_id)); // Detach partition p1. - let child_p1_name = format!("{}_{}", table_name.name, "p1"); - let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name); + let partition_p1_name = format!("{}_{}", table_name.name, "p1"); + let partition_p1_qualified = format!("{}.{}", table_name.schema, partition_p1_name); database .run_sql(&format!( "alter table {} detach partition {}", table_name.as_quoted_identifier(), - child_p1_qualified + partition_p1_qualified )) .await .unwrap(); @@ -969,20 +881,17 @@ async fn partition_detach_with_schema_publication_does_not_replicate_detached_in .query( "select count(*) as cnt from pg_publication_tables where pubname = $1 and tablename = $2", - &[&publication_name, &child_p1_name], + &[&publication_name, &partition_p1_name], ) .await .unwrap(); let pub_tables_count: i64 = pub_tables_check[0].get("cnt"); - assert_eq!( - pub_tables_count, 1, - "Detached partition should appear in pg_publication_tables for TABLES IN SCHEMA publication" - ); + assert_eq!(pub_tables_count, 1); // Insert into detached partition. database .run_sql(&format!( - "insert into {child_p1_qualified} (data, partition_key) values ('detached_event', 25)" + "insert into {partition_p1_qualified} (data, partition_key) values ('detached_event', 25)" )) .await .unwrap(); @@ -1007,10 +916,7 @@ async fn partition_detach_with_schema_publication_does_not_replicate_detached_in // The pipeline state should still only track the parent table. let table_states_after = state_store.get_table_replication_states().await; - assert!( - table_states_after.contains_key(&parent_table_id), - "Parent table should still be tracked after detachment" - ); + assert!(table_states_after.contains_key(&parent_table_id)); // Verify events. let events = destination.get_events().await; @@ -1021,22 +927,14 @@ async fn partition_detach_with_schema_publication_does_not_replicate_detached_in .get(&(EventType::Insert, parent_table_id)) .cloned() .unwrap_or_default(); - assert_eq!( - parent_inserts.len(), - 1, - "Parent table should have exactly 1 CDC insert event" - ); + assert_eq!(parent_inserts.len(), 1); // Detached partition inserts should NOT be replicated without table re-discovery. let detached_inserts = grouped .get(&(EventType::Insert, p1_table_id)) .cloned() .unwrap_or_default(); - assert_eq!( - detached_inserts.len(), - 0, - "Detached partition inserts should NOT be replicated without table re-discovery" - ); + assert_eq!(detached_inserts.len(), 0); } /// Tests that a detached partition is discovered as a new table after pipeline restart @@ -1062,14 +960,13 @@ async fn partition_detach_with_schema_publication_does_replicate_detached_insert let (parent_table_id, partition_table_ids) = create_partitioned_table(&database, table_name.clone(), &partition_specs) .await - .expect("Failed to create partitioned table"); + .unwrap(); let p1_table_id = partition_table_ids[0]; database .run_sql(&format!( - "insert into {} (data, partition_key) values \ - ('event1', 50), ('event2', 150)", + "insert into {} (data, partition_key) values ('event1', 50), ('event2', 150)", table_name.as_quoted_identifier() )) .await @@ -1104,23 +1001,17 @@ async fn partition_detach_with_schema_publication_does_replicate_detached_insert // Verify initial state. let table_states_before = state_store.get_table_replication_states().await; - assert!( - table_states_before.contains_key(&parent_table_id), - "Parent table should be tracked before detachment" - ); - assert!( - !table_states_before.contains_key(&p1_table_id), - "Child partition p1 should NOT be tracked separately before detachment" - ); + assert!(table_states_before.contains_key(&parent_table_id)); + assert!(!table_states_before.contains_key(&p1_table_id)); // Detach partition p1. - let child_p1_name = format!("{}_{}", table_name.name, "p1"); - let child_p1_qualified = format!("{}.{}", table_name.schema, child_p1_name); + let partition_p1_name = format!("{}_{}", table_name.name, "p1"); + let partition_p1_qualified = format!("{}.{}", table_name.schema, partition_p1_name); database .run_sql(&format!( "alter table {} detach partition {}", table_name.as_quoted_identifier(), - child_p1_qualified + partition_p1_qualified )) .await .unwrap(); @@ -1128,7 +1019,7 @@ async fn partition_detach_with_schema_publication_does_replicate_detached_insert // Insert into detached partition (while pipeline is still running). database .run_sql(&format!( - "insert into {child_p1_qualified} (data, partition_key) values ('detached_event', 25)" + "insert into {partition_p1_qualified} (data, partition_key) values ('detached_event', 25)" )) .await .unwrap(); @@ -1158,10 +1049,7 @@ async fn partition_detach_with_schema_publication_does_replicate_detached_insert // Verify the detached partition was discovered and synced. let table_states_after = state_store.get_table_replication_states().await; - assert!( - table_states_after.contains_key(&p1_table_id), - "Detached partition should be discovered as a standalone table after restart" - ); + assert!(table_states_after.contains_key(&p1_table_id)); // Verify the data from the detached partition was copied. let table_rows = destination.get_table_rows().await; @@ -1169,22 +1057,20 @@ async fn partition_detach_with_schema_publication_does_replicate_detached_insert .get(&parent_table_id) .map(|rows| rows.len()) .unwrap_or(0); - assert_eq!( - parent_rows, 2, - "Parent table should have the initial 2 rows from first pipeline run" - ); + assert_eq!(parent_rows, 2); let detached_rows: usize = table_rows .get(&p1_table_id) .map(|rows| rows.len()) .unwrap_or(0); - assert_eq!( - detached_rows, 2, - "Detached partition should have 2 rows synced after pipeline restart (1 from initial data + 1 inserted)" - ); + assert_eq!(detached_rows, 2); } -/// Tests that the system gracefully stops in case `publish_via_partition_root` is set to `false` -/// which is currently not supported. +/// Tests that the system doesn't crash abruptly `publish_via_partition_root` is set to `false`. +/// +/// The current behavior is to silently not perform replication, but we might want to refine this behavior +/// and throw an error when we detect that there are partitioned tables in a publication and the setting +/// is `false`. This way, we would be able to avoid forcing the user to always set `publish_via_partition_root=true` +/// when it's unnecessary. #[tokio::test(flavor = "multi_thread")] async fn partitioned_table_with_publish_via_root_false() { init_test_tracing(); @@ -1196,12 +1082,11 @@ async fn partitioned_table_with_publish_via_root_false() { let (parent_table_id, _partition_table_ids) = create_partitioned_table(&database, table_name.clone(), &partition_specs) .await - .expect("Failed to create partitioned table"); + .unwrap(); database .run_sql(&format!( - "insert into {} (data, partition_key) values - ('event1', 50), ('event2', 150)", + "insert into {} (data, partition_key) values ('event1', 50), ('event2', 150)", table_name.as_quoted_identifier() )) .await @@ -1211,7 +1096,7 @@ async fn partitioned_table_with_publish_via_root_false() { database .create_publication_with_config(&publication_name, std::slice::from_ref(&table_name), false) .await - .expect("Failed to create publication"); + .unwrap(); let state_store = NotifyingStore::new(); let destination = TestDestinationWrapper::wrap(MemoryDestination::new()); @@ -1249,8 +1134,7 @@ async fn partitioned_table_with_publish_via_root_false() { database .run_sql(&format!( - "insert into {} (data, partition_key) values \ - ('event1', 50)", + "insert into {} (data, partition_key) values ('event1', 50)", table_name.as_quoted_identifier() )) .await @@ -1263,13 +1147,9 @@ async fn partitioned_table_with_publish_via_root_false() { // No inserts should be captured for the reasons explained above. let events = destination.get_events().await; let grouped_events = group_events_by_type_and_table_id(&events); - let p1_inserts = grouped_events + let parent_inserts = grouped_events .get(&(EventType::Insert, parent_table_id)) .cloned() .unwrap_or_default(); - assert_eq!( - p1_inserts.len(), - 0, - "Inserts in partition 'p1' should be skipped because `publish_via_partition_root` is `false`" - ); + assert!(parent_inserts.is_empty()); } From 5276bec0087358720616ee6ec545bdd70b0a28a2 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Fri, 24 Oct 2025 17:12:17 +0200 Subject: [PATCH 18/26] Improve --- etl/tests/pipeline_with_partitioned_table.rs | 179 +++++++++++++++++++ 1 file changed, 179 insertions(+) diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs index f049a9840..7d6a82e3f 100644 --- a/etl/tests/pipeline_with_partitioned_table.rs +++ b/etl/tests/pipeline_with_partitioned_table.rs @@ -10,6 +10,7 @@ use etl::test_utils::test_destination_wrapper::TestDestinationWrapper; use etl::test_utils::test_schema::create_partitioned_table; use etl::types::EventType; use etl::types::PipelineId; +use etl::types::TableId; use etl_telemetry::tracing::init_test_tracing; use rand::random; @@ -1065,6 +1066,184 @@ async fn partition_detach_with_schema_publication_does_replicate_detached_insert assert_eq!(detached_rows, 2); } +/// Tests that nested partitions (sub-partitioned tables) work correctly. +/// Creates a two-level partition hierarchy where one partition is itself partitioned, +/// and verifies that both initial COPY and CDC streaming work correctly. +/// Only the top-level parent table should be tracked in the pipeline state. +#[tokio::test(flavor = "multi_thread")] +async fn nested_partitioned_table_copy_and_cdc() { + init_test_tracing(); + let database = spawn_source_database().await; + + let table_name = test_table_name("nested_partitioned_events"); + + // Create the parent partitioned table (Level 1). + // Primary key must include all partitioning columns used at any level. + database + .run_sql(&format!( + "create table {} ( + id bigserial, + data text NOT NULL, + partition_key integer NOT NULL, + sub_partition_key integer NOT NULL, + primary key (id, partition_key, sub_partition_key) + ) partition by range (partition_key)", + table_name.as_quoted_identifier() + )) + .await + .unwrap(); + + // Get parent table ID. + let parent_row = database + .client + .as_ref() + .unwrap() + .query_one( + "select c.oid from pg_class c join pg_namespace n on n.oid = c.relnamespace + where n.nspname = $1 and c.relname = $2", + &[&table_name.schema, &table_name.name], + ) + .await + .unwrap(); + let parent_table_id: TableId = parent_row.get(0); + + // Create first partition (simple leaf partition) (Level 2a). + let p1_name = format!("{}_{}", table_name.name, "p1"); + let p1_qualified = format!("{}.{}", table_name.schema, p1_name); + database + .run_sql(&format!( + "create table {} partition of {} for values from (1) to (100)", + p1_qualified, + table_name.as_quoted_identifier() + )) + .await + .unwrap(); + + // Create second partition that is itself partitioned (Level 2b). + let p2_name = format!("{}_{}", table_name.name, "p2"); + let p2_qualified = format!("{}.{}", table_name.schema, p2_name); + database + .run_sql(&format!( + "create table {} partition of {} for values from (100) to (200) partition by range (sub_partition_key)", + p2_qualified, + table_name.as_quoted_identifier() + )) + .await + .unwrap(); + + // Create sub-partitions of p2 (Level 3). + let p2_sub1_name = format!("{}_{}", p2_name, "sub1"); + let p2_sub1_qualified = format!("{}.{}", table_name.schema, p2_sub1_name); + database + .run_sql(&format!( + "create table {} partition of {} for values from (1) to (50)", + p2_sub1_qualified, + p2_qualified + )) + .await + .unwrap(); + + let p2_sub2_name = format!("{}_{}", p2_name, "sub2"); + let p2_sub2_qualified = format!("{}.{}", table_name.schema, p2_sub2_name); + database + .run_sql(&format!( + "create table {} partition of {} for values from (50) to (100)", + p2_sub2_qualified, + p2_qualified + )) + .await + .unwrap(); + + // Insert initial data into different partitions: + // - event_p1 goes to the simple leaf partition p1 + // - event_p2_sub1 goes to nested partition p2 -> p2_sub1 + // - event_p2_sub2 goes to nested partition p2 -> p2_sub2 + database + .run_sql(&format!( + "insert into {} (data, partition_key, sub_partition_key) values + ('event_p1', 50, 25), + ('event_p2_sub1', 150, 25), + ('event_p2_sub2', 150, 75)", + table_name.as_quoted_identifier() + )) + .await + .unwrap(); + + let publication_name = "test_nested_partitioned_pub".to_string(); + database + .create_publication(&publication_name, std::slice::from_ref(&table_name)) + .await + .unwrap(); + + let state_store = NotifyingStore::new(); + let destination = TestDestinationWrapper::wrap(MemoryDestination::new()); + + // Register notification for initial copy completion. + let parent_sync_done = state_store + .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone) + .await; + + let pipeline_id: PipelineId = random(); + let mut pipeline = create_pipeline( + &database.config, + pipeline_id, + publication_name, + state_store.clone(), + destination.clone(), + ); + + pipeline.start().await.unwrap(); + + parent_sync_done.notified().await; + + // Verify initial COPY replicated all 3 rows. + let table_rows = destination.get_table_rows().await; + let total_rows: usize = table_rows.values().map(|rows| rows.len()).sum(); + assert_eq!(total_rows, 3); + + // Verify only the parent table is tracked (not intermediate or leaf partitions). + let table_states = state_store.get_table_replication_states().await; + assert!(table_states.contains_key(&parent_table_id)); + assert_eq!(table_states.len(), 1); + + // Verify all rows are attributed to the parent table. + let parent_table_rows = table_rows + .iter() + .filter(|(table_id, _)| **table_id == parent_table_id) + .map(|(_, rows)| rows.len()) + .sum::(); + assert_eq!(parent_table_rows, 3); + + // Insert new rows into different nested partitions. + let inserts_notify = destination + .wait_for_events_count(vec![(EventType::Insert, 3)]) + .await; + + database + .run_sql(&format!( + "insert into {} (data, partition_key, sub_partition_key) values + ('new_event_p1', 75, 30), + ('new_event_p2_sub1', 125, 40), + ('new_event_p2_sub2', 175, 60)", + table_name.as_quoted_identifier() + )) + .await + .unwrap(); + + inserts_notify.notified().await; + + let _ = pipeline.shutdown_and_wait().await; + + // Verify that events were captured for all nested partitions. + let events = destination.get_events().await; + let grouped = group_events_by_type_and_table_id(&events); + let parent_inserts = grouped + .get(&(EventType::Insert, parent_table_id)) + .cloned() + .unwrap_or_default(); + assert_eq!(parent_inserts.len(), 3); +} + /// Tests that the system doesn't crash abruptly `publish_via_partition_root` is set to `false`. /// /// The current behavior is to silently not perform replication, but we might want to refine this behavior From 0ac142a4c2ec0021623067cc44afe6b3f5152283 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Fri, 24 Oct 2025 17:16:14 +0200 Subject: [PATCH 19/26] Improve --- etl/tests/pipeline_with_partitioned_table.rs | 80 ++++++++++++++++++-- 1 file changed, 74 insertions(+), 6 deletions(-) diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs index 7d6a82e3f..04f12471c 100644 --- a/etl/tests/pipeline_with_partitioned_table.rs +++ b/etl/tests/pipeline_with_partitioned_table.rs @@ -13,6 +13,7 @@ use etl::types::PipelineId; use etl::types::TableId; use etl_telemetry::tracing::init_test_tracing; use rand::random; +use tokio_postgres::types::Type; /// Tests that initial COPY replicates all rows from a partitioned table. /// Only the parent table is tracked, not individual child partitions. @@ -70,13 +71,43 @@ async fn partitioned_table_copy_replicates_existing_data() { let _ = pipeline.shutdown_and_wait().await; + // Verify table schema was discovered correctly. + let table_schemas = state_store.get_table_schemas().await; + assert!(table_schemas.contains_key(&parent_table_id)); + + let parent_schema = &table_schemas[&parent_table_id]; + assert_eq!(parent_schema.id, parent_table_id); + assert_eq!(parent_schema.name, table_name); + + // Verify columns are correctly discovered. + assert_eq!(parent_schema.column_schemas.len(), 3); + + // Check id column (added by default). + let id_column = &parent_schema.column_schemas[0]; + assert_eq!(id_column.name, "id"); + assert_eq!(id_column.typ, Type::INT8); + assert!(!id_column.nullable); + assert!(id_column.primary); + + // Check data column. + let data_column = &parent_schema.column_schemas[1]; + assert_eq!(data_column.name, "data"); + assert_eq!(data_column.typ, Type::TEXT); + assert!(!data_column.nullable); + assert!(!data_column.primary); + + // Check partition_key column. + let partition_key_column = &parent_schema.column_schemas[2]; + assert_eq!(partition_key_column.name, "partition_key"); + assert_eq!(partition_key_column.typ, Type::INT4); + assert!(!partition_key_column.nullable); + assert!(partition_key_column.primary); + let table_rows = destination.get_table_rows().await; let total_rows: usize = table_rows.values().map(|rows| rows.len()).sum(); - assert_eq!(total_rows, 3); let table_states = state_store.get_table_replication_states().await; - assert!(table_states.contains_key(&parent_table_id)); assert_eq!(table_states.len(), 1); @@ -1137,8 +1168,7 @@ async fn nested_partitioned_table_copy_and_cdc() { database .run_sql(&format!( "create table {} partition of {} for values from (1) to (50)", - p2_sub1_qualified, - p2_qualified + p2_sub1_qualified, p2_qualified )) .await .unwrap(); @@ -1148,8 +1178,7 @@ async fn nested_partitioned_table_copy_and_cdc() { database .run_sql(&format!( "create table {} partition of {} for values from (50) to (100)", - p2_sub2_qualified, - p2_qualified + p2_sub2_qualified, p2_qualified )) .await .unwrap(); @@ -1196,6 +1225,45 @@ async fn nested_partitioned_table_copy_and_cdc() { parent_sync_done.notified().await; + // Verify table schema was discovered correctly for nested partitioned table. + let table_schemas = state_store.get_table_schemas().await; + assert!(table_schemas.contains_key(&parent_table_id)); + + let parent_schema = &table_schemas[&parent_table_id]; + assert_eq!(parent_schema.id, parent_table_id); + assert_eq!(parent_schema.name, table_name); + + // Verify columns are correctly discovered (includes sub_partition_key). + assert_eq!(parent_schema.column_schemas.len(), 4); + + // Check id column (added by default). + let id_column = &parent_schema.column_schemas[0]; + assert_eq!(id_column.name, "id"); + assert_eq!(id_column.typ, Type::INT8); + assert!(!id_column.nullable); + assert!(id_column.primary); + + // Check data column. + let data_column = &parent_schema.column_schemas[1]; + assert_eq!(data_column.name, "data"); + assert_eq!(data_column.typ, Type::TEXT); + assert!(!data_column.nullable); + assert!(!data_column.primary); + + // Check partition_key column (part of primary key). + let partition_key_column = &parent_schema.column_schemas[2]; + assert_eq!(partition_key_column.name, "partition_key"); + assert_eq!(partition_key_column.typ, Type::INT4); + assert!(!partition_key_column.nullable); + assert!(partition_key_column.primary); + + // Check sub_partition_key column (part of primary key for nested partitioning). + let sub_partition_key_column = &parent_schema.column_schemas[3]; + assert_eq!(sub_partition_key_column.name, "sub_partition_key"); + assert_eq!(sub_partition_key_column.typ, Type::INT4); + assert!(!sub_partition_key_column.nullable); + assert!(sub_partition_key_column.primary); + // Verify initial COPY replicated all 3 rows. let table_rows = destination.get_table_rows().await; let total_rows: usize = table_rows.values().map(|rows| rows.len()).sum(); From 3fec642b433a64c0ddfe30e430384d6db5e58885 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Fri, 24 Oct 2025 17:20:44 +0200 Subject: [PATCH 20/26] Improve --- etl/tests/pipeline_with_partitioned_table.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs index 04f12471c..90ea4cff4 100644 --- a/etl/tests/pipeline_with_partitioned_table.rs +++ b/etl/tests/pipeline_with_partitioned_table.rs @@ -1167,8 +1167,7 @@ async fn nested_partitioned_table_copy_and_cdc() { let p2_sub1_qualified = format!("{}.{}", table_name.schema, p2_sub1_name); database .run_sql(&format!( - "create table {} partition of {} for values from (1) to (50)", - p2_sub1_qualified, p2_qualified + "create table {p2_sub1_qualified} partition of {p2_qualified} for values from (1) to (50)" )) .await .unwrap(); @@ -1177,8 +1176,7 @@ async fn nested_partitioned_table_copy_and_cdc() { let p2_sub2_qualified = format!("{}.{}", table_name.schema, p2_sub2_name); database .run_sql(&format!( - "create table {} partition of {} for values from (50) to (100)", - p2_sub2_qualified, p2_qualified + "create table {p2_sub2_qualified} partition of {p2_qualified} for values from (50) to (100)" )) .await .unwrap(); From bf4ef9ffe2d693621126a0440d18e56045cd4fee Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Mon, 27 Oct 2025 10:12:10 +0100 Subject: [PATCH 21/26] Improve --- etl/src/replication/client.rs | 18 ++-------- etl/tests/pipeline_with_partitioned_table.rs | 37 ++++++++++++++++---- 2 files changed, 32 insertions(+), 23 deletions(-) diff --git a/etl/src/replication/client.rs b/etl/src/replication/client.rs index 5a03e0d6c..ef4d1e7bd 100644 --- a/etl/src/replication/client.rs +++ b/etl/src/replication/client.rs @@ -426,27 +426,13 @@ impl PgReplicationClient { let query = format!( r#" with recursive pub_tables as ( - -- Get explicit tables from publication (for regular publications) - select r.prrelid as oid - from pg_publication_rel r - join pg_publication p on p.oid = r.prpubid - where p.pubname = {pub} - - union all - - -- Get tables from pg_publication_tables (for ALL TABLES and FOR TABLES IN SCHEMA) - -- Only executes if pg_publication_rel is empty for this publication + -- Get all tables from publication (pg_publication_tables includes explicit tables, + -- ALL TABLES publications, and FOR TABLES IN SCHEMA publications) select c.oid from pg_publication_tables pt join pg_class c on c.relname = pt.tablename join pg_namespace n on n.oid = c.relnamespace and n.nspname = pt.schemaname where pt.pubname = {pub} - and not exists ( - select 1 - from pg_publication_rel r - join pg_publication p on p.oid = r.prpubid - where p.pubname = {pub} - ) ), hierarchy(relid) as ( -- Start with published tables diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs index 90ea4cff4..8a8b4161c 100644 --- a/etl/tests/pipeline_with_partitioned_table.rs +++ b/etl/tests/pipeline_with_partitioned_table.rs @@ -198,13 +198,6 @@ async fn partitioned_table_copy_and_streams_new_data_from_new_partition() { let _ = pipeline.shutdown_and_wait().await; let table_rows = destination.get_table_rows().await; - let total_rows: usize = table_rows.values().map(|rows| rows.len()).sum(); - assert_eq!(total_rows, 2); - - let table_states = state_store.get_table_replication_states().await; - assert!(table_states.contains_key(&parent_table_id)); - assert_eq!(table_states.len(), 1); - let parent_table_rows = table_rows .iter() .filter(|(table_id, _)| **table_id == parent_table_id) @@ -296,6 +289,21 @@ async fn partition_drop_does_not_emit_delete_or_truncate() { .await .unwrap(); + // Insert a row into an existing partition to ensure the pipeline is still processing events. + let inserts_notify = destination + .wait_for_events_count(vec![(EventType::Insert, 1)]) + .await; + + database + .run_sql(&format!( + "insert into {} (data, partition_key) values ('event3', 150)", + table_name.as_quoted_identifier() + )) + .await + .unwrap(); + + inserts_notify.notified().await; + let _ = pipeline.shutdown_and_wait().await; let events_after = destination.get_events().await; @@ -447,6 +455,21 @@ async fn child_table_truncate_does_not_emit_truncate_event() { .await .unwrap(); + // Insert a row into an existing partition to ensure the pipeline is still processing events. + let inserts_notify = destination + .wait_for_events_count(vec![(EventType::Insert, 1)]) + .await; + + database + .run_sql(&format!( + "insert into {} (data, partition_key) values ('event3', 150)", + table_name.as_quoted_identifier() + )) + .await + .unwrap(); + + inserts_notify.notified().await; + let _ = pipeline.shutdown_and_wait().await; let events = destination.get_events().await; From dbede951064454d86cf8f038930cacbc3f52309b Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Mon, 27 Oct 2025 11:31:04 +0100 Subject: [PATCH 22/26] Improve --- etl/src/pipeline.rs | 36 ++++++++++ etl/src/replication/client.rs | 54 ++++++++++++++ etl/tests/pipeline_with_partitioned_table.rs | 74 +++++++------------- 3 files changed, 117 insertions(+), 47 deletions(-) diff --git a/etl/src/pipeline.rs b/etl/src/pipeline.rs index e021886d4..d5eb7b3c7 100644 --- a/etl/src/pipeline.rs +++ b/etl/src/pipeline.rs @@ -300,6 +300,42 @@ where publication_table_ids.len() ); + // Validate that the publication is configured correctly for partitioned tables. + // + // When `publish_via_partition_root = false`, logical replication messages contain + // child partition OIDs instead of parent table OIDs. Since our schema cache only + // contains parent table IDs (from `get_publication_table_ids`), relation messages + // with child OIDs would cause pipeline failures. + let publish_via_partition_root = replication_client + .get_publish_via_partition_root(&self.config.publication_name) + .await?; + + if !publish_via_partition_root { + let has_partitioned_tables = replication_client + .has_partitioned_tables(&publication_table_ids) + .await?; + + if has_partitioned_tables { + error!( + "publication '{}' has publish_via_partition_root=false but contains partitioned table(s)", + self.config.publication_name + ); + + bail!( + ErrorKind::ConfigError, + "Invalid publication configuration for partitioned tables", + format!( + "The publication '{}' contains partitioned tables but has publish_via_partition_root=false. \ + This configuration causes replication messages to use child partition OIDs, which are not \ + tracked by the pipeline and will cause failures. Please recreate the publication with \ + publish_via_partition_root=true or use: ALTER PUBLICATION {} SET (publish_via_partition_root = true);", + self.config.publication_name, + self.config.publication_name + ) + ); + } + } + self.store.load_table_replication_states().await?; let table_replication_states = self.store.get_table_replication_states().await?; diff --git a/etl/src/replication/client.rs b/etl/src/replication/client.rs index ef4d1e7bd..40e617584 100644 --- a/etl/src/replication/client.rs +++ b/etl/src/replication/client.rs @@ -387,6 +387,60 @@ impl PgReplicationClient { Ok(false) } + /// Retrieves the `publish_via_partition_root` setting for a publication. + /// + /// Returns `true` if the publication is configured to send replication messages using + /// the parent table OID, or `false` if it sends them using child partition OIDs. + pub async fn get_publish_via_partition_root(&self, publication: &str) -> EtlResult { + let query = format!( + "select pubviaroot from pg_publication where pubname = {};", + quote_literal(publication) + ); + + for msg in self.client.simple_query(&query).await? { + if let SimpleQueryMessage::Row(row) = msg { + let pubviaroot = + Self::get_row_value::(&row, "pubviaroot", "pg_publication").await?; + return Ok(pubviaroot == "t"); + } + } + + bail!( + ErrorKind::ConfigError, + "Publication not found", + format!("Publication '{}' not found in database", publication) + ); + } + + /// Checks if any of the provided table IDs are partitioned tables. + /// + /// A partitioned table is one where `relkind = 'p'` in `pg_class`. + /// Returns `true` if at least one table is partitioned, `false` otherwise. + pub async fn has_partitioned_tables(&self, table_ids: &[TableId]) -> EtlResult { + if table_ids.is_empty() { + return Ok(false); + } + + let table_oids_list = table_ids + .iter() + .map(|id| id.0.to_string()) + .collect::>() + .join(", "); + + let query = format!( + "select 1 from pg_class where oid in ({}) and relkind = 'p' limit 1;", + table_oids_list + ); + + for msg in self.client.simple_query(&query).await? { + if let SimpleQueryMessage::Row(_) = msg { + return Ok(true); + } + } + + Ok(false) + } + /// Retrieves the names of all tables included in a publication. pub async fn get_publication_table_names( &self, diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs index 8a8b4161c..8dae9dfc0 100644 --- a/etl/tests/pipeline_with_partitioned_table.rs +++ b/etl/tests/pipeline_with_partitioned_table.rs @@ -1333,12 +1333,16 @@ async fn nested_partitioned_table_copy_and_cdc() { assert_eq!(parent_inserts.len(), 3); } -/// Tests that the system doesn't crash abruptly `publish_via_partition_root` is set to `false`. +/// Tests that the pipeline throws an error during startup when `publish_via_partition_root` +/// is set to `false` and the publication contains partitioned tables. /// -/// The current behavior is to silently not perform replication, but we might want to refine this behavior -/// and throw an error when we detect that there are partitioned tables in a publication and the setting -/// is `false`. This way, we would be able to avoid forcing the user to always set `publish_via_partition_root=true` -/// when it's unnecessary. +/// When `publish_via_partition_root = false`, logical replication messages contain child +/// partition OIDs instead of parent table OIDs. Since the pipeline's schema cache only +/// tracks parent table IDs, this configuration would cause pipeline failures when relation +/// messages arrive with unknown child OIDs. +/// +/// The pipeline validates this configuration at startup and rejects it with a clear error +/// message instructing the user to enable `publish_via_partition_root`. #[tokio::test(flavor = "multi_thread")] async fn partitioned_table_with_publish_via_root_false() { init_test_tracing(); @@ -1347,7 +1351,7 @@ async fn partitioned_table_with_publish_via_root_false() { let table_name = test_table_name("partitioned_events"); let partition_specs = [("p1", "from (1) to (100)"), ("p2", "from (100) to (200)")]; - let (parent_table_id, _partition_table_ids) = + let (_parent_table_id, _partition_table_ids) = create_partitioned_table(&database, table_name.clone(), &partition_specs) .await .unwrap(); @@ -1373,51 +1377,27 @@ async fn partitioned_table_with_publish_via_root_false() { let mut pipeline = create_pipeline( &database.config, pipeline_id, - publication_name, + publication_name.clone(), state_store.clone(), destination.clone(), ); - // Wait on the sync done of the parent. - let parent_sync_done = state_store - .notify_on_table_state_type(parent_table_id, TableReplicationPhaseType::SyncDone) - .await; - - pipeline.start().await.unwrap(); - - // Wait on the sync done of the parent. - parent_sync_done.notified().await; - - // Wait for the COMMIT event of the insert in the parent table. COMMIT events are always - // processed unconditionally because they don't contain relation-specific information. - // - // We use the COMMIT event to verify transaction processing: we can check whether the - // transaction's component events were captured. In this case, they should NOT be present - // because when `publication_via_partition_root` is `false`, events are tagged with child - // table OIDs. Since these child table OIDs are unknown to us (we always try to find the parent oid), - // those events are skipped. - let commit = destination - .wait_for_events_count(vec![(EventType::Commit, 1)]) - .await; - - database - .run_sql(&format!( - "insert into {} (data, partition_key) values ('event1', 50)", - table_name.as_quoted_identifier() - )) - .await - .unwrap(); - - commit.notified().await; + // The pipeline should fail to start due to invalid configuration. + let start_result = pipeline.start().await; + assert!(start_result.is_err()); - pipeline.shutdown_and_wait().await.unwrap(); + let err = start_result.unwrap_err(); + let err_message = err.to_string(); - // No inserts should be captured for the reasons explained above. - let events = destination.get_events().await; - let grouped_events = group_events_by_type_and_table_id(&events); - let parent_inserts = grouped_events - .get(&(EventType::Insert, parent_table_id)) - .cloned() - .unwrap_or_default(); - assert!(parent_inserts.is_empty()); + // Verify the error message contains the expected information. + assert!( + err_message.contains("publish_via_partition_root"), + "Error message should mention publish_via_partition_root, got: {}", + err_message + ); + assert!( + err_message.contains(&publication_name), + "Error message should mention the publication name, got: {}", + err_message + ); } From 899ef6938bb4f069890abc75499ac704971af76e Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Mon, 27 Oct 2025 11:32:54 +0100 Subject: [PATCH 23/26] Improve --- etl/tests/pipeline_with_partitioned_table.rs | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs index 8dae9dfc0..826ecc41e 100644 --- a/etl/tests/pipeline_with_partitioned_table.rs +++ b/etl/tests/pipeline_with_partitioned_table.rs @@ -14,6 +14,7 @@ use etl::types::TableId; use etl_telemetry::tracing::init_test_tracing; use rand::random; use tokio_postgres::types::Type; +use etl::error::ErrorKind; /// Tests that initial COPY replicates all rows from a partitioned table. /// Only the parent table is tracked, not individual child partitions. @@ -1383,21 +1384,6 @@ async fn partitioned_table_with_publish_via_root_false() { ); // The pipeline should fail to start due to invalid configuration. - let start_result = pipeline.start().await; - assert!(start_result.is_err()); - - let err = start_result.unwrap_err(); - let err_message = err.to_string(); - - // Verify the error message contains the expected information. - assert!( - err_message.contains("publish_via_partition_root"), - "Error message should mention publish_via_partition_root, got: {}", - err_message - ); - assert!( - err_message.contains(&publication_name), - "Error message should mention the publication name, got: {}", - err_message - ); + let err = pipeline.start().await.err().unwrap(); + assert_eq!(err.kind(), ErrorKind::ConfigError); } From fb43b9d8ab02fefa630c3fcc92b98949e28d26e5 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Mon, 27 Oct 2025 11:35:19 +0100 Subject: [PATCH 24/26] Improve --- etl/src/pipeline.rs | 3 +-- etl/src/replication/client.rs | 3 +-- etl/tests/pipeline_with_partitioned_table.rs | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/etl/src/pipeline.rs b/etl/src/pipeline.rs index d5eb7b3c7..36b4b1dac 100644 --- a/etl/src/pipeline.rs +++ b/etl/src/pipeline.rs @@ -329,8 +329,7 @@ where This configuration causes replication messages to use child partition OIDs, which are not \ tracked by the pipeline and will cause failures. Please recreate the publication with \ publish_via_partition_root=true or use: ALTER PUBLICATION {} SET (publish_via_partition_root = true);", - self.config.publication_name, - self.config.publication_name + self.config.publication_name, self.config.publication_name ) ); } diff --git a/etl/src/replication/client.rs b/etl/src/replication/client.rs index 40e617584..9797679e8 100644 --- a/etl/src/replication/client.rs +++ b/etl/src/replication/client.rs @@ -428,8 +428,7 @@ impl PgReplicationClient { .join(", "); let query = format!( - "select 1 from pg_class where oid in ({}) and relkind = 'p' limit 1;", - table_oids_list + "select 1 from pg_class where oid in ({table_oids_list}) and relkind = 'p' limit 1;" ); for msg in self.client.simple_query(&query).await? { diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs index 826ecc41e..794f665ed 100644 --- a/etl/tests/pipeline_with_partitioned_table.rs +++ b/etl/tests/pipeline_with_partitioned_table.rs @@ -1,6 +1,7 @@ #![cfg(feature = "test-utils")] use etl::destination::memory::MemoryDestination; +use etl::error::ErrorKind; use etl::state::table::TableReplicationPhaseType; use etl::test_utils::database::{spawn_source_database, test_table_name}; use etl::test_utils::event::group_events_by_type_and_table_id; @@ -14,7 +15,6 @@ use etl::types::TableId; use etl_telemetry::tracing::init_test_tracing; use rand::random; use tokio_postgres::types::Type; -use etl::error::ErrorKind; /// Tests that initial COPY replicates all rows from a partitioned table. /// Only the parent table is tracked, not individual child partitions. From df3044a40cc08c334972a6d8ab23f6e654a94efd Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Mon, 27 Oct 2025 12:09:41 +0100 Subject: [PATCH 25/26] Improve --- etl/tests/pipeline_with_partitioned_table.rs | 47 +++++++++++++++++++- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs index 794f665ed..f9bd3a377 100644 --- a/etl/tests/pipeline_with_partitioned_table.rs +++ b/etl/tests/pipeline_with_partitioned_table.rs @@ -8,7 +8,9 @@ use etl::test_utils::event::group_events_by_type_and_table_id; use etl::test_utils::notify::NotifyingStore; use etl::test_utils::pipeline::create_pipeline; use etl::test_utils::test_destination_wrapper::TestDestinationWrapper; -use etl::test_utils::test_schema::create_partitioned_table; +use etl::test_utils::test_schema::{ + TableSelection, create_partitioned_table, setup_test_database_schema, +}; use etl::types::EventType; use etl::types::PipelineId; use etl::types::TableId; @@ -1345,7 +1347,7 @@ async fn nested_partitioned_table_copy_and_cdc() { /// The pipeline validates this configuration at startup and rejects it with a clear error /// message instructing the user to enable `publish_via_partition_root`. #[tokio::test(flavor = "multi_thread")] -async fn partitioned_table_with_publish_via_root_false() { +async fn partitioned_table_with_publish_via_partition_root_false_and_partitioned_tables() { init_test_tracing(); let database = spawn_source_database().await; @@ -1387,3 +1389,44 @@ async fn partitioned_table_with_publish_via_root_false() { let err = pipeline.start().await.err().unwrap(); assert_eq!(err.kind(), ErrorKind::ConfigError); } + +/// Tests that the pipeline doesn't throw an error when `publish_via_partition_root=false` and there +/// are no partitioned tables in the tables of the publication. +#[tokio::test(flavor = "multi_thread")] +async fn partitioned_table_with_publish_via_partition_root_false_and_no_partitioned_tables() { + init_test_tracing(); + let database = spawn_source_database().await; + + let table_name = test_table_name("non_partitioned_events"); + database + .create_table( + table_name.clone(), + true, + &[("description", "text not null")], + ) + .await + .unwrap(); + + let publication_name = "test_non_partitioned_pub".to_string(); + database + .create_publication_with_config(&publication_name, std::slice::from_ref(&table_name), false) + .await + .unwrap(); + + let state_store = NotifyingStore::new(); + let destination = TestDestinationWrapper::wrap(MemoryDestination::new()); + + let pipeline_id: PipelineId = random(); + let mut pipeline = create_pipeline( + &database.config, + pipeline_id, + publication_name.clone(), + state_store.clone(), + destination.clone(), + ); + + // The pipeline should start and stop successfully. + pipeline.start().await.unwrap(); + let result = pipeline.shutdown_and_wait().await; + assert!(result.is_ok()); +} From 17b0357326769016f9bc0d56f3667edd4872a345 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Mon, 27 Oct 2025 12:17:16 +0100 Subject: [PATCH 26/26] Improve --- etl/tests/pipeline_with_partitioned_table.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/etl/tests/pipeline_with_partitioned_table.rs b/etl/tests/pipeline_with_partitioned_table.rs index f9bd3a377..f096bff1f 100644 --- a/etl/tests/pipeline_with_partitioned_table.rs +++ b/etl/tests/pipeline_with_partitioned_table.rs @@ -8,9 +8,7 @@ use etl::test_utils::event::group_events_by_type_and_table_id; use etl::test_utils::notify::NotifyingStore; use etl::test_utils::pipeline::create_pipeline; use etl::test_utils::test_destination_wrapper::TestDestinationWrapper; -use etl::test_utils::test_schema::{ - TableSelection, create_partitioned_table, setup_test_database_schema, -}; +use etl::test_utils::test_schema::create_partitioned_table; use etl::types::EventType; use etl::types::PipelineId; use etl::types::TableId;