diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f3f15fb..2a7c88da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,9 @@ - Add cell delimiters and code lens actions to the Positron extension (#366) - ODBC is now turned on for the CLI as well (#344) +- `FROM` can now come before `VISUALIZE`, mirroring the DuckDB style. This means +that `FROM table VISUALIZE x, y` and `VISUALIZE x, y FROM table` are equivalent +queries (#369) - CLI now has built-in documentation through the `docs` command as well as a skill for llms through the `skill` command (#361) diff --git a/src/execute/cte.rs b/src/execute/cte.rs index 6abe56ab..242028ed 100644 --- a/src/execute/cte.rs +++ b/src/execute/cte.rs @@ -177,7 +177,7 @@ pub fn split_with_query(source_tree: &SourceTree) -> Option<(String, String)> { let mut cursor = with_node.walk(); let mut last_cte_end: Option = None; - let mut select_node = None; + let mut tail_node = None; let mut seen_cte = false; for child in with_node.children(&mut cursor) { @@ -186,8 +186,14 @@ pub fn split_with_query(source_tree: &SourceTree) -> Option<(String, String)> { seen_cte = true; last_cte_end = Some(child.end_byte()); } + // WITH's tail may be a SELECT or a bare FROM (DuckDB-style). + // For from_statement, we rewrite the tail to `SELECT * `. "select_statement" if seen_cte => { - select_node = Some(child); + tail_node = Some((child, false)); + break; + } + "from_statement" if seen_cte => { + tail_node = Some((child, true)); break; } _ => {} @@ -195,8 +201,13 @@ pub fn split_with_query(source_tree: &SourceTree) -> Option<(String, String)> { } let cte_prefix = source_tree.source[with_node.start_byte()..last_cte_end?].to_string(); - let trailing_select = source_tree.get_text(&select_node?); - Some((cte_prefix, trailing_select)) + let (node, is_from) = tail_node?; + let trailing = if is_from { + format!("SELECT * {}", source_tree.get_text(&node)) + } else { + source_tree.get_text(&node) + }; + Some((cte_prefix, trailing)) } /// Transform global SQL for execution with temp tables @@ -240,14 +251,16 @@ pub fn transform_global_sql( pub fn has_executable_sql(source_tree: &SourceTree) -> bool { let root = source_tree.root(); - // Check for direct executable statements (SELECT, CREATE, INSERT, UPDATE, DELETE) + // Check for direct executable statements (SELECT, CREATE, INSERT, UPDATE, + // DELETE, or bare FROM (DuckDB-style FROM-first — rewritten to SELECT *)) let direct_statements = r#" (sql_statement [(select_statement) (create_statement) (insert_statement) (update_statement) - (delete_statement)] @stmt) + (delete_statement) + (from_statement)] @stmt) "#; if source_tree.find_node(&root, direct_statements).is_some() { diff --git a/src/parser/source_tree.rs b/src/parser/source_tree.rs index ef4acc57..6850d9ca 100644 --- a/src/parser/source_tree.rs +++ b/src/parser/source_tree.rs @@ -29,11 +29,37 @@ impl<'a> SourceTree<'a> { .parse(source, None) .ok_or_else(|| GgsqlError::ParseError("Failed to parse query".to_string()))?; - Ok(Self { + let source_tree = Self { tree, source, language, - }) + }; + + // Reject ambiguous double-FROM: `FROM a VISUALISE FROM b …` has two + // data sources for one statement. Caught here (rather than at extract + // time) so extract_sql returns a plain Option and every consumer that + // already handles new()'s Result gets the check for free. + source_tree.check_no_double_from()?; + + Ok(source_tree) + } + + fn check_no_double_from(&self) -> Result<()> { + let root = self.root(); + let has_sql_from = self + .find_node(&root, "(sql_statement (from_statement) @stmt)") + .is_some(); + let has_viz_from = self + .find_node(&root, "(visualise_statement (from_clause (table_ref) @t))") + .is_some(); + if has_sql_from && has_viz_from { + return Err(GgsqlError::ParseError( + "VISUALISE has two FROM clauses (one before VISUALISE and one after). \ + Use only one." + .to_string(), + )); + } + Ok(()) } /// Validate that the parse tree has no errors @@ -107,10 +133,23 @@ impl<'a> SourceTree<'a> { .collect() } - /// Extract the SQL portion of the query (before VISUALISE) + /// Extract the SQL portion of the query (before VISUALISE). + /// + /// Two rewrites happen here so the returned SQL is always something a + /// plain SQL reader can execute: /// - /// If VISUALISE FROM is used, this injects "SELECT * FROM " - /// Returns None if there's no SQL portion and no VISUALISE FROM injection needed + /// - DuckDB-style FROM-first: the grammar parses bare `FROM t` as a + /// `from_statement`. Each such statement is rewritten by prepending + /// `SELECT * ` — so `FROM sales VISUALISE …` becomes + /// `SELECT * FROM sales`. + /// - `VISUALISE FROM `: the FROM appears on the VISUALISE clause. + /// We append `SELECT * FROM ` to the SQL so the reader sees an + /// executable query. + /// + /// Returns `None` if there's no SQL portion and no VISUALISE FROM to + /// inject. The ambiguous double-FROM case (`FROM a VISUALISE FROM b …`) + /// is rejected in `SourceTree::new`, so any tree reaching here has at + /// most one of the two FROMs. pub fn extract_sql(&self) -> Option { let root = self.root(); @@ -124,20 +163,45 @@ impl<'a> SourceTree<'a> { } // Find sql_portion node and extract its text - let sql_text = self - .find_node(&root, "(sql_portion) @sql") + let sql_portion_node = self.find_node(&root, "(sql_portion) @sql"); + let mut sql_text = sql_portion_node .map(|node| self.get_text(&node)) .unwrap_or_default(); - // Check if any VISUALISE statement has FROM clause - let from_query = r#" - (visualise_statement - (from_clause - (table_ref) @table)) - "#; + // DuckDB-style FROM-first: the grammar recognizes bare `FROM t` as an + // sql_statement variant. Rewrite each such occurrence by prepending + // `SELECT * `. + if let Some(sql_portion) = sql_portion_node { + let from_stmts = self.find_nodes(&sql_portion, "(from_statement) @from_stmt"); + if !from_stmts.is_empty() { + let portion_start = sql_portion.start_byte(); + let portion_end = sql_portion.end_byte(); + let mut stmts: Vec = from_stmts; + stmts.sort_by_key(|n| n.start_byte()); + let mut out = String::with_capacity(sql_text.len() + stmts.len() * 9); + let mut cursor = portion_start; + for stmt in stmts { + let s = stmt.start_byte(); + out.push_str(&self.source[cursor..s]); + out.push_str("SELECT * "); + cursor = s; + } + out.push_str(&self.source[cursor..portion_end]); + sql_text = out; + } + } + + // VISUALISE FROM : append "SELECT * FROM ". + let viz_from = self.find_text( + &root, + r#" + (visualise_statement + (from_clause + (table_ref) @table)) + "#, + ); - if let Some(from_identifier) = self.find_text(&root, from_query) { - // Inject SELECT * FROM + if let Some(from_identifier) = viz_from { let result = if sql_text.trim().is_empty() { format!("SELECT * FROM {}", from_identifier) } else { @@ -145,7 +209,6 @@ impl<'a> SourceTree<'a> { }; Some(result) } else { - // No injection needed - return SQL if not empty let trimmed = sql_text.trim(); if trimmed.is_empty() { None @@ -288,6 +351,120 @@ mod tests { assert!(!sql.contains("SELECT * FROM SELECT")); // Make sure we didn't double-inject } + // ======================================================================== + // FROM-first (DuckDB-style) tests + // ======================================================================== + + #[test] + fn test_extract_sql_from_first_simple() { + let query = "FROM mtcars VISUALISE DRAW point MAPPING mpg AS x, hp AS y"; + let tree = SourceTree::new(query).unwrap(); + + let sql = tree.extract_sql().unwrap(); + assert!(sql.contains("SELECT * FROM mtcars")); + + let viz = tree.extract_visualise().unwrap(); + assert!(viz.starts_with("VISUALISE")); + } + + #[test] + fn test_extract_sql_from_first_with_where() { + let query = "FROM sales WHERE year = 2024 VISUALISE DRAW point MAPPING x AS x, y AS y"; + let tree = SourceTree::new(query).unwrap(); + + let sql = tree.extract_sql().unwrap(); + assert!(sql.contains("SELECT * FROM sales WHERE year = 2024")); + } + + #[test] + fn test_extract_sql_from_first_with_cte() { + let query = + "WITH cte AS (SELECT * FROM x) FROM cte VISUALISE DRAW point MAPPING a AS x, b AS y"; + let tree = SourceTree::new(query).unwrap(); + + let sql = tree.extract_sql().unwrap(); + assert!(sql.contains("WITH cte AS (SELECT * FROM x)")); + assert!(sql.contains("SELECT * FROM cte")); + } + + #[test] + fn test_extract_sql_from_first_after_create() { + let query = + "CREATE TABLE x AS SELECT 1; FROM x VISUALISE DRAW point MAPPING a AS x, b AS y"; + let tree = SourceTree::new(query).unwrap(); + + let sql = tree.extract_sql().unwrap(); + assert!(sql.contains("CREATE TABLE x AS SELECT 1")); + assert!(sql.contains("SELECT * FROM x")); + } + + #[test] + fn test_extract_sql_from_first_file_path() { + let query = "FROM 'mtcars.csv' VISUALISE DRAW point MAPPING mpg AS x, hp AS y"; + let tree = SourceTree::new(query).unwrap(); + + let sql = tree.extract_sql().unwrap(); + assert!(sql.contains("SELECT * FROM 'mtcars.csv'")); + } + + #[test] + fn test_extract_sql_from_first_case_insensitive() { + let query = "from sales visualise DRAW point MAPPING x AS x, y AS y"; + let tree = SourceTree::new(query).unwrap(); + + let sql = tree.extract_sql().unwrap(); + assert!(sql.contains("SELECT * from sales")); + } + + #[test] + fn test_extract_sql_no_rewrite_when_select_precedes_from() { + // Regression: `SELECT a, b FROM t VISUALISE ...` must NOT trigger + // SELECT * injection — the FROM belongs to the SELECT. + let query = "SELECT a, b FROM t VISUALISE DRAW point MAPPING a AS x, b AS y"; + let tree = SourceTree::new(query).unwrap(); + + let sql = tree.extract_sql().unwrap(); + assert_eq!(sql, "SELECT a, b FROM t"); + } + + #[test] + fn test_double_from_rejected_at_parse() { + // Leading FROM + VISUALISE FROM is ambiguous and must error at parse + // time (before any extract_sql call). + let query = "FROM a VISUALISE FROM b DRAW point MAPPING x AS x, y AS y"; + let err = SourceTree::new(query).unwrap_err(); + let msg = format!("{}", err); + assert!( + msg.contains("two FROM clauses"), + "expected double-FROM rejection, got: {}", + msg + ); + } + + #[test] + fn test_extract_sql_from_first_skips_string_contents() { + // A FROM inside a string literal in a preceding statement should be + // parsed as part of the string, not mistaken for a bare FROM. + let query = + "CREATE TABLE x AS SELECT 'FROM fake' AS col; FROM x VISUALISE DRAW point MAPPING a AS x, b AS y"; + let tree = SourceTree::new(query).unwrap(); + + let sql = tree.extract_sql().unwrap(); + // Injected SELECT * precedes the real FROM, not the string one. + assert!(sql.contains("'FROM fake'")); + assert!(sql.contains("SELECT * FROM x")); + } + + #[test] + fn test_extract_sql_from_first_skips_line_comment() { + let query = "-- FROM fake\nFROM real VISUALISE DRAW point MAPPING a AS x, b AS y"; + let tree = SourceTree::new(query).unwrap(); + + let sql = tree.extract_sql().unwrap(); + assert!(sql.contains("SELECT * FROM real")); + assert!(!sql.contains("SELECT * -- FROM fake")); + } + #[test] fn test_extract_sql_visualise_from_file_path_single_quotes() { let query = "VISUALISE FROM 'mtcars.csv' DRAW point MAPPING mpg AS x, hp AS y"; diff --git a/tree-sitter-ggsql/grammar.js b/tree-sitter-ggsql/grammar.js index 36b2504e..a402e45a 100644 --- a/tree-sitter-ggsql/grammar.js +++ b/tree-sitter-ggsql/grammar.js @@ -48,9 +48,29 @@ module.exports = grammar({ $.insert_statement, $.update_statement, $.delete_statement, + $.from_statement, // DuckDB-style FROM-first: `FROM t` ≡ `SELECT * FROM t` $.other_sql_statement // Fallback for other SQL ), + // Bare FROM as a terminal SQL statement (DuckDB-style). Starts with a + // from_clause and optionally consumes trailing tokens (WHERE, GROUP BY, + // ORDER BY, LIMIT, etc.) up to VISUALISE — mirrors select_body's permissive + // token bag so the same trailing-SQL constructs work after a bare FROM. + from_statement: $ => prec.right(seq( + $.from_clause, + repeat(choice( + $.window_function, + $.cast_expression, + $.function_call, + $.non_from_sql_keyword, + $.string, + $.number, + ',', '*', '.', '=', '<', '>', '!', '+', '-', '/', '%', '|', '&', '^', '~', '::', + $.subquery, + $.identifier + )) + )), + // SELECT statement select_statement: $ => prec(2, seq( caseInsensitive('SELECT'), @@ -70,13 +90,14 @@ module.exports = grammar({ $.identifier ))), - // WITH statement (CTEs) - WITH must be followed by SELECT + // WITH statement (CTEs) - tail is an optional SELECT or bare FROM + // (`WITH cte AS (...) FROM cte` is DuckDB-style FROM-first after WITH). with_statement: $ => prec.right(2, seq( caseInsensitive('WITH'), optional(caseInsensitive('RECURSIVE')), $.cte_definition, repeat(seq(',', $.cte_definition)), - optional($.select_statement) // WITH can optionally be followed by SELECT + optional(choice($.select_statement, $.from_statement)) )), cte_definition: $ => seq( @@ -155,11 +176,11 @@ module.exports = grammar({ )), // Other SQL statements - DO NOT match if starts with keywords we handle - // explicitly (WITH, SELECT, CREATE, INSERT, UPDATE, DELETE, VISUALISE) + // explicitly (WITH, SELECT, CREATE, INSERT, UPDATE, DELETE, VISUALISE, FROM). other_sql_statement: $ => { - const exclude_pattern = /[^\s;(),'"WwSsCcIiUuDdVv]+/; + const exclude_pattern = /[^\s;(),'"WwSsCcIiUuDdVvFf]+/; return prec(-1, repeat1(choice( - $.sql_keyword, + $.non_from_sql_keyword, token(exclude_pattern), // Tokens not starting with excluded letters $.string, $.number, @@ -231,9 +252,16 @@ module.exports = grammar({ ')' )), - // Common SQL keywords (to help parser recognize structure) + // Common SQL keywords (to help parser recognize structure). + // Split into FROM + non_from_sql_keyword so other_sql_statement can use + // just the non-FROM variant for its first token (preventing it from + // eating `FROM t VISUALISE ...` which should parse as from_statement). sql_keyword: $ => choice( caseInsensitive('FROM'), + $.non_from_sql_keyword + ), + + non_from_sql_keyword: $ => choice( caseInsensitive('WHERE'), caseInsensitive('JOIN'), caseInsensitive('LEFT'),