From 926b4fdf5bcb3652b2c6ffb0a25b34fe240ac90c Mon Sep 17 00:00:00 2001 From: psteinroe Date: Fri, 26 Sep 2025 09:47:56 +0200 Subject: [PATCH 1/4] fix: begin atomic split --- crates/pgt_statement_splitter/src/lib.rs | 24 +++++++++++++++++++ .../src/splitter/common.rs | 2 ++ 2 files changed, 26 insertions(+) diff --git a/crates/pgt_statement_splitter/src/lib.rs b/crates/pgt_statement_splitter/src/lib.rs index 6fb81c092..4f3503a91 100644 --- a/crates/pgt_statement_splitter/src/lib.rs +++ b/crates/pgt_statement_splitter/src/lib.rs @@ -133,6 +133,30 @@ mod tests { } } + #[test] + fn begin_atomic() { + Tester::from( + "CREATE OR REPLACE FUNCTION public.test_fn(some_in TEXT) +RETURNS TEXT +LANGUAGE sql +IMMUTABLE +STRICT +BEGIN ATOMIC + SELECT $1 || 'foo'; +END;", + ) + .expect_statements(vec![ + "CREATE OR REPLACE FUNCTION public.test_fn(some_in TEXT) +RETURNS TEXT +LANGUAGE sql +IMMUTABLE +STRICT +BEGIN ATOMIC + SELECT $1 || 'foo'; +END;", + ]); + } + #[test] fn ts_with_timezone() { Tester::from("alter table foo add column bar timestamp with time zone;").expect_statements( diff --git a/crates/pgt_statement_splitter/src/splitter/common.rs b/crates/pgt_statement_splitter/src/splitter/common.rs index 54db04e8b..d92c07abe 100644 --- a/crates/pgt_statement_splitter/src/splitter/common.rs +++ b/crates/pgt_statement_splitter/src/splitter/common.rs @@ -188,6 +188,8 @@ pub(crate) fn unknown(p: &mut Splitter, exclude: &[SyntaxKind]) { // for revoke SyntaxKind::REVOKE_KW, SyntaxKind::COMMA, + // for BEGIN ATOMIC + SyntaxKind::ATOMIC_KW, ] .iter() .all(|x| Some(x) != prev.as_ref()) From 9022ea7a5e598ba78503adfe410d3665d80e6c5c Mon Sep 17 00:00:00 2001 From: psteinroe Date: Fri, 26 Sep 2025 10:53:42 +0200 Subject: [PATCH 2/4] progress --- crates/pgt_statement_splitter/src/lib.rs | 2 +- .../src/splitter/common.rs | 20 +++++++++++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/crates/pgt_statement_splitter/src/lib.rs b/crates/pgt_statement_splitter/src/lib.rs index 4f3503a91..f6e9ea28c 100644 --- a/crates/pgt_statement_splitter/src/lib.rs +++ b/crates/pgt_statement_splitter/src/lib.rs @@ -92,7 +92,7 @@ mod tests { assert_eq!( self.result.ranges.len(), expected.len(), - "Expected {} statements for input {}, got {}: {:?}", + "Expected {} statements for input\n{}\ngot {}:\n{:?}", expected.len(), self.input, self.result.ranges.len(), diff --git a/crates/pgt_statement_splitter/src/splitter/common.rs b/crates/pgt_statement_splitter/src/splitter/common.rs index d92c07abe..a5dec8d33 100644 --- a/crates/pgt_statement_splitter/src/splitter/common.rs +++ b/crates/pgt_statement_splitter/src/splitter/common.rs @@ -120,11 +120,17 @@ pub(crate) fn case(p: &mut Splitter) { } pub(crate) fn unknown(p: &mut Splitter, exclude: &[SyntaxKind]) { + let mut in_atomic = false; loop { match p.current() { SyntaxKind::SEMICOLON => { + if in_atomic { + // only end the statement if the next non-trivia token is not END + // this is to handle cases like BEGIN ATOMIC SELECT ...; END; + p.advance(); + break; + } p.advance(); - break; } SyntaxKind::EOF => { break; @@ -257,7 +263,6 @@ pub(crate) fn unknown(p: &mut Splitter, exclude: &[SyntaxKind]) { } p.advance(); } - Some(SyntaxKind::CREATE_KW) => { let prev = p.look_back(true); if [ @@ -275,6 +280,17 @@ pub(crate) fn unknown(p: &mut Splitter, exclude: &[SyntaxKind]) { p.advance(); } + Some(SyntaxKind::ATOMIC_KW) => { + if p.look_back(true) == Some(SyntaxKind::BEGIN_KW) { + // BEGIN ATOMIC ... END; + in_atomic = true; + } + p.advance(); + } + Some(SyntaxKind::END_KW) => { + in_atomic = false; + p.advance(); + } Some(_) => { break; } From 8108864897249d198f2416b21c0cd6416bbf4938 Mon Sep 17 00:00:00 2001 From: psteinroe Date: Fri, 26 Sep 2025 11:11:03 +0200 Subject: [PATCH 3/4] progress --- .../pgt_statement_splitter/src/splitter/common.rs | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/crates/pgt_statement_splitter/src/splitter/common.rs b/crates/pgt_statement_splitter/src/splitter/common.rs index a5dec8d33..6d6ca32ca 100644 --- a/crates/pgt_statement_splitter/src/splitter/common.rs +++ b/crates/pgt_statement_splitter/src/splitter/common.rs @@ -120,11 +120,10 @@ pub(crate) fn case(p: &mut Splitter) { } pub(crate) fn unknown(p: &mut Splitter, exclude: &[SyntaxKind]) { - let mut in_atomic = false; loop { match p.current() { SyntaxKind::SEMICOLON => { - if in_atomic { + if p.look_ahead(true) != SyntaxKind::END_KW { // only end the statement if the next non-trivia token is not END // this is to handle cases like BEGIN ATOMIC SELECT ...; END; p.advance(); @@ -280,17 +279,6 @@ pub(crate) fn unknown(p: &mut Splitter, exclude: &[SyntaxKind]) { p.advance(); } - Some(SyntaxKind::ATOMIC_KW) => { - if p.look_back(true) == Some(SyntaxKind::BEGIN_KW) { - // BEGIN ATOMIC ... END; - in_atomic = true; - } - p.advance(); - } - Some(SyntaxKind::END_KW) => { - in_atomic = false; - p.advance(); - } Some(_) => { break; } From 13ec44facc8e9ce861b8c6378ad27ee1b257f028 Mon Sep 17 00:00:00 2001 From: psteinroe Date: Fri, 26 Sep 2025 11:22:27 +0200 Subject: [PATCH 4/4] progress --- crates/pgt_statement_splitter/src/lib.rs | 10 +++++ .../src/splitter/common.rs | 42 ++++++++++++++++--- 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/crates/pgt_statement_splitter/src/lib.rs b/crates/pgt_statement_splitter/src/lib.rs index f6e9ea28c..c67bc0e97 100644 --- a/crates/pgt_statement_splitter/src/lib.rs +++ b/crates/pgt_statement_splitter/src/lib.rs @@ -133,6 +133,16 @@ mod tests { } } + #[test] + fn begin_commit() { + Tester::from( + "BEGIN; +SELECT 1; +COMMIT;", + ) + .expect_statements(vec!["BEGIN;", "SELECT 1;", "COMMIT;"]); + } + #[test] fn begin_atomic() { Tester::from( diff --git a/crates/pgt_statement_splitter/src/splitter/common.rs b/crates/pgt_statement_splitter/src/splitter/common.rs index 6d6ca32ca..fcb851dac 100644 --- a/crates/pgt_statement_splitter/src/splitter/common.rs +++ b/crates/pgt_statement_splitter/src/splitter/common.rs @@ -58,6 +58,33 @@ pub(crate) fn statement(p: &mut Splitter) { p.close_stmt(); } +pub(crate) fn begin_end(p: &mut Splitter) { + p.expect(SyntaxKind::BEGIN_KW); + + let mut depth = 1; + + loop { + match p.current() { + SyntaxKind::BEGIN_KW => { + p.advance(); + depth += 1; + } + SyntaxKind::END_KW | SyntaxKind::EOF => { + if p.current() == SyntaxKind::END_KW { + p.advance(); + } + depth -= 1; + if depth == 0 { + break; + } + } + _ => { + p.advance(); + } + } + } +} + pub(crate) fn parenthesis(p: &mut Splitter) { p.expect(SyntaxKind::L_PAREN); @@ -123,13 +150,8 @@ pub(crate) fn unknown(p: &mut Splitter, exclude: &[SyntaxKind]) { loop { match p.current() { SyntaxKind::SEMICOLON => { - if p.look_ahead(true) != SyntaxKind::END_KW { - // only end the statement if the next non-trivia token is not END - // this is to handle cases like BEGIN ATOMIC SELECT ...; END; - p.advance(); - break; - } p.advance(); + break; } SyntaxKind::EOF => { break; @@ -168,6 +190,14 @@ pub(crate) fn unknown(p: &mut Splitter, exclude: &[SyntaxKind]) { SyntaxKind::L_PAREN => { parenthesis(p); } + SyntaxKind::BEGIN_KW => { + if p.look_ahead(true) != SyntaxKind::SEMICOLON { + // BEGIN; should be treated as a statement terminator + begin_end(p); + } else { + p.advance(); + } + } t => match at_statement_start(t, exclude) { Some(SyntaxKind::SELECT_KW) => { let prev = p.look_back(true);