55/// being used. all words are put into the "Word" type and will be defined in more detail by the results of pg_query.rs
66use cstree:: text:: { TextRange , TextSize } ;
77use logos:: Logos ;
8+ use regex:: Regex ;
89
910use crate :: {
1011 parser:: Parser , pg_query_utils:: get_position_for_pg_query_node, syntax_kind:: SyntaxKind ,
1112} ;
1213
14+ #[ derive( Logos , Debug , PartialEq ) ]
15+ pub enum Test {
16+ #[ regex( "'([^']+)'|\\ $(\\ w)?\\ $.*\\ $(\\ w)?\\ $" ) ]
17+ Sconst ,
18+ }
19+
1320#[ derive( Logos , Debug , PartialEq ) ]
1421pub enum StatementToken {
1522 // copied from protobuf::Token. can be generated later
@@ -52,7 +59,8 @@ pub enum StatementToken {
5259 #[ token( "^" ) ]
5360 Ascii94 ,
5461 // comments, whitespaces and keywords
55- #[ regex( "'([^']+)'" ) ]
62+ // FIXME: nexted and named dollar quoted strings do not work yet
63+ #[ regex( "'([^']+)'|\\ $(\\ w)?\\ $.*\\ $(\\ w)?\\ $" ) ]
5664 Sconst ,
5765 #[ regex( "(\\ w+)" gm) ]
5866 Word ,
@@ -143,20 +151,36 @@ impl Parser {
143151 // parse root node if no syntax errors
144152 if pg_query_nodes. peek ( ) . is_some ( ) {
145153 let ( node, depth, _) = pg_query_nodes. next ( ) . unwrap ( ) ;
146- // TODO: if root node is a create or alter function stmt, parse the function body
147- // separately
148154 self . stmt ( node. to_enum ( ) , range) ;
149155 self . start_node_at ( SyntaxKind :: from_pg_query_node ( & node) , Some ( depth) ) ;
150- self . set_checkpoint ( false ) ;
156+ // if there is only one node, there are no children, and we do not need to buffer the
157+ // tokens. this happens for example with create or alter function statements.
158+ self . set_checkpoint ( pg_query_nodes. peek ( ) . is_none ( ) ) ;
151159 } else {
152160 // fallback to generic node as root
153161 self . start_node_at ( SyntaxKind :: Stmt , None ) ;
154162 self . set_checkpoint ( true ) ;
155163 }
156164
165+ // FIXME: the lexer, for some reason, does not parse dollar quoted string
166+ // so we check if the error is one
157167 while let Some ( token) = lexer. next ( ) {
158- match token {
159- Ok ( token) => {
168+ let t: Option < StatementToken > = match token {
169+ Ok ( token) => Some ( token) ,
170+ Err ( _) => {
171+ if Regex :: new ( "'([^']+)'|\\ $(\\ w)?\\ $.*\\ $(\\ w)?\\ $" )
172+ . unwrap ( )
173+ . is_match_at ( lexer. slice ( ) , 0 )
174+ {
175+ Some ( StatementToken :: Sconst )
176+ } else {
177+ None
178+ }
179+ }
180+ } ;
181+
182+ match t {
183+ Some ( token) => {
160184 let span = lexer. span ( ) ;
161185
162186 // consume pg_query nodes until there is none, or the node is outside of the current text span
@@ -179,6 +203,8 @@ impl Parser {
179203 ) || span
180204 . contains ( & usize:: try_from ( next_pg_query_token. unwrap ( ) . end ) . unwrap ( ) ) )
181205 {
206+ // TODO: if within function declaration and current token is Sconst, its
207+ // the function body. it should be passed into parse_source_file.
182208 self . token (
183209 SyntaxKind :: from_pg_query_token ( & pg_query_tokens. next ( ) . unwrap ( ) ) ,
184210 lexer. slice ( ) ,
@@ -188,7 +214,7 @@ impl Parser {
188214 self . token ( token. syntax_kind ( ) , lexer. slice ( ) ) ;
189215 }
190216 }
191- Err ( _ ) => panic ! ( "Unknown SourceFileToken : {:?}" , lexer. span ( ) ) ,
217+ None => panic ! ( "Unknown StatementToken : {:?}" , lexer. slice ( ) ) ,
192218 }
193219 }
194220
@@ -272,4 +298,19 @@ mod tests {
272298
273299 assert_eq ! ( parsed. cst. text( ) , input) ;
274300 }
301+
302+ #[ test]
303+ fn test_create_sql_function ( ) {
304+ let input = "CREATE FUNCTION dup(in int, out f1 int, out f2 text)
305+ AS $$ SELECT $1, CAST($1 AS text) || ' is text' $$
306+ LANGUAGE SQL;" ;
307+
308+ let mut parser = Parser :: new ( ) ;
309+ parser. parse_statement ( input, None ) ;
310+ let parsed = parser. finish ( ) ;
311+
312+ dbg ! ( & parsed. cst) ;
313+
314+ assert_eq ! ( parsed. cst. text( ) , input) ;
315+ }
275316}
0 commit comments