Skip to content

Commit c4e9edb

Browse files
committed
fix: workarouond for dollar quoted strings
1 parent f3d3aa7 commit c4e9edb

File tree

3 files changed

+55
-13
lines changed

3 files changed

+55
-13
lines changed

crates/parser/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,5 @@ cstree = { version = "0.12.0", features = ["derive"] }
1010
pg_query = "0.7"
1111
logos = "0.13.0"
1212
serde_json = "1.0"
13+
regex = "1.9.1"
1314
serde = { version = "1.0", features = ["derive"] }

crates/parser/src/parser.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ pub struct Parser {
1515
errors: Vec<SyntaxError>,
1616
stmts: Vec<RawStmt>,
1717
checkpoint: Option<i32>,
18-
is_parsing_erronous_node: bool,
18+
is_parsing_flat_node: bool,
1919
}
2020

2121
#[derive(Debug)]
@@ -35,7 +35,7 @@ impl Parser {
3535
errors: Vec::new(),
3636
stmts: Vec::new(),
3737
checkpoint: None,
38-
is_parsing_erronous_node: false,
38+
is_parsing_flat_node: false,
3939
}
4040
}
4141

@@ -46,12 +46,12 @@ impl Parser {
4646
}
4747
}
4848

49-
pub fn set_checkpoint(&mut self, is_parsing_erronous_node: bool) {
49+
pub fn set_checkpoint(&mut self, is_parsing_flat_node: bool) {
5050
assert!(self.checkpoint.is_none());
5151
assert!(self.token_buffer.is_empty());
5252
println!("set_checkpoint at {}", self.curr_depth);
5353
self.checkpoint = Some(self.curr_depth);
54-
self.is_parsing_erronous_node = is_parsing_erronous_node;
54+
self.is_parsing_flat_node = is_parsing_flat_node;
5555
}
5656

5757
pub fn close_checkpoint(&mut self) {
@@ -60,7 +60,7 @@ impl Parser {
6060
self.close_until_depth(self.checkpoint.unwrap());
6161
}
6262
self.checkpoint = None;
63-
self.is_parsing_erronous_node = false;
63+
self.is_parsing_flat_node = false;
6464
}
6565

6666
pub fn start_node(&mut self, kind: SyntaxKind) {
@@ -101,7 +101,7 @@ impl Parser {
101101
///
102102
/// if `is_parsing_erronous_node` is true, applies token immediately
103103
pub fn token(&mut self, kind: SyntaxKind, text: &str) {
104-
if self.is_parsing_erronous_node {
104+
if self.is_parsing_flat_node {
105105
self.inner.token(kind, text);
106106
return;
107107
}

crates/parser/src/statement.rs

Lines changed: 48 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,18 @@
55
/// being used. all words are put into the "Word" type and will be defined in more detail by the results of pg_query.rs
66
use cstree::text::{TextRange, TextSize};
77
use logos::Logos;
8+
use regex::Regex;
89

910
use crate::{
1011
parser::Parser, pg_query_utils::get_position_for_pg_query_node, syntax_kind::SyntaxKind,
1112
};
1213

14+
#[derive(Logos, Debug, PartialEq)]
15+
pub enum Test {
16+
#[regex("'([^']+)'|\\$(\\w)?\\$.*\\$(\\w)?\\$")]
17+
Sconst,
18+
}
19+
1320
#[derive(Logos, Debug, PartialEq)]
1421
pub enum StatementToken {
1522
// copied from protobuf::Token. can be generated later
@@ -52,7 +59,8 @@ pub enum StatementToken {
5259
#[token("^")]
5360
Ascii94,
5461
// comments, whitespaces and keywords
55-
#[regex("'([^']+)'")]
62+
// FIXME: nexted and named dollar quoted strings do not work yet
63+
#[regex("'([^']+)'|\\$(\\w)?\\$.*\\$(\\w)?\\$")]
5664
Sconst,
5765
#[regex("(\\w+)"gm)]
5866
Word,
@@ -143,20 +151,36 @@ impl Parser {
143151
// parse root node if no syntax errors
144152
if pg_query_nodes.peek().is_some() {
145153
let (node, depth, _) = pg_query_nodes.next().unwrap();
146-
// TODO: if root node is a create or alter function stmt, parse the function body
147-
// separately
148154
self.stmt(node.to_enum(), range);
149155
self.start_node_at(SyntaxKind::from_pg_query_node(&node), Some(depth));
150-
self.set_checkpoint(false);
156+
// if there is only one node, there are no children, and we do not need to buffer the
157+
// tokens. this happens for example with create or alter function statements.
158+
self.set_checkpoint(pg_query_nodes.peek().is_none());
151159
} else {
152160
// fallback to generic node as root
153161
self.start_node_at(SyntaxKind::Stmt, None);
154162
self.set_checkpoint(true);
155163
}
156164

165+
// FIXME: the lexer, for some reason, does not parse dollar quoted string
166+
// so we check if the error is one
157167
while let Some(token) = lexer.next() {
158-
match token {
159-
Ok(token) => {
168+
let t: Option<StatementToken> = match token {
169+
Ok(token) => Some(token),
170+
Err(_) => {
171+
if Regex::new("'([^']+)'|\\$(\\w)?\\$.*\\$(\\w)?\\$")
172+
.unwrap()
173+
.is_match_at(lexer.slice(), 0)
174+
{
175+
Some(StatementToken::Sconst)
176+
} else {
177+
None
178+
}
179+
}
180+
};
181+
182+
match t {
183+
Some(token) => {
160184
let span = lexer.span();
161185

162186
// consume pg_query nodes until there is none, or the node is outside of the current text span
@@ -179,6 +203,8 @@ impl Parser {
179203
) || span
180204
.contains(&usize::try_from(next_pg_query_token.unwrap().end).unwrap()))
181205
{
206+
// TODO: if within function declaration and current token is Sconst, its
207+
// the function body. it should be passed into parse_source_file.
182208
self.token(
183209
SyntaxKind::from_pg_query_token(&pg_query_tokens.next().unwrap()),
184210
lexer.slice(),
@@ -188,7 +214,7 @@ impl Parser {
188214
self.token(token.syntax_kind(), lexer.slice());
189215
}
190216
}
191-
Err(_) => panic!("Unknown SourceFileToken: {:?}", lexer.span()),
217+
None => panic!("Unknown StatementToken: {:?}", lexer.slice()),
192218
}
193219
}
194220

@@ -272,4 +298,19 @@ mod tests {
272298

273299
assert_eq!(parsed.cst.text(), input);
274300
}
301+
302+
#[test]
303+
fn test_create_sql_function() {
304+
let input = "CREATE FUNCTION dup(in int, out f1 int, out f2 text)
305+
AS $$ SELECT $1, CAST($1 AS text) || ' is text' $$
306+
LANGUAGE SQL;";
307+
308+
let mut parser = Parser::new();
309+
parser.parse_statement(input, None);
310+
let parsed = parser.finish();
311+
312+
dbg!(&parsed.cst);
313+
314+
assert_eq!(parsed.cst.text(), input);
315+
}
275316
}

0 commit comments

Comments
 (0)