Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

- Add cell delimiters and code lens actions to the Positron extension (#366)
- ODBC is now turned on for the CLI as well (#344)
- `FROM` can now come before `VISUALIZE`, mirroring the DuckDB style. This means
that `FROM table VISUALIZE x, y` and `VISUALIZE x, y FROM table` are equivalent
queries (#369)
- CLI now has built-in documentation through the `docs` command as well as a
skill for llms through the `skill` command (#361)

Expand Down
25 changes: 19 additions & 6 deletions src/execute/cte.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ pub fn split_with_query(source_tree: &SourceTree) -> Option<(String, String)> {

let mut cursor = with_node.walk();
let mut last_cte_end: Option<usize> = None;
let mut select_node = None;
let mut tail_node = None;
let mut seen_cte = false;

for child in with_node.children(&mut cursor) {
Expand All @@ -186,17 +186,28 @@ pub fn split_with_query(source_tree: &SourceTree) -> Option<(String, String)> {
seen_cte = true;
last_cte_end = Some(child.end_byte());
}
// WITH's tail may be a SELECT or a bare FROM (DuckDB-style).
// For from_statement, we rewrite the tail to `SELECT * <from_stmt>`.
"select_statement" if seen_cte => {
select_node = Some(child);
tail_node = Some((child, false));
break;
}
"from_statement" if seen_cte => {
tail_node = Some((child, true));
break;
}
_ => {}
}
}

let cte_prefix = source_tree.source[with_node.start_byte()..last_cte_end?].to_string();
let trailing_select = source_tree.get_text(&select_node?);
Some((cte_prefix, trailing_select))
let (node, is_from) = tail_node?;
let trailing = if is_from {
format!("SELECT * {}", source_tree.get_text(&node))
} else {
source_tree.get_text(&node)
};
Some((cte_prefix, trailing))
}

/// Transform global SQL for execution with temp tables
Expand Down Expand Up @@ -240,14 +251,16 @@ pub fn transform_global_sql(
pub fn has_executable_sql(source_tree: &SourceTree) -> bool {
let root = source_tree.root();

// Check for direct executable statements (SELECT, CREATE, INSERT, UPDATE, DELETE)
// Check for direct executable statements (SELECT, CREATE, INSERT, UPDATE,
// DELETE, or bare FROM (DuckDB-style FROM-first — rewritten to SELECT *))
let direct_statements = r#"
(sql_statement
[(select_statement)
(create_statement)
(insert_statement)
(update_statement)
(delete_statement)] @stmt)
(delete_statement)
(from_statement)] @stmt)
"#;

if source_tree.find_node(&root, direct_statements).is_some() {
Expand Down
209 changes: 193 additions & 16 deletions src/parser/source_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,37 @@ impl<'a> SourceTree<'a> {
.parse(source, None)
.ok_or_else(|| GgsqlError::ParseError("Failed to parse query".to_string()))?;

Ok(Self {
let source_tree = Self {
tree,
source,
language,
})
};

// Reject ambiguous double-FROM: `FROM a VISUALISE FROM b …` has two
// data sources for one statement. Caught here (rather than at extract
// time) so extract_sql returns a plain Option and every consumer that
// already handles new()'s Result gets the check for free.
source_tree.check_no_double_from()?;

Ok(source_tree)
}

fn check_no_double_from(&self) -> Result<()> {
let root = self.root();
let has_sql_from = self
.find_node(&root, "(sql_statement (from_statement) @stmt)")
.is_some();
let has_viz_from = self
.find_node(&root, "(visualise_statement (from_clause (table_ref) @t))")
.is_some();
if has_sql_from && has_viz_from {
return Err(GgsqlError::ParseError(
"VISUALISE has two FROM clauses (one before VISUALISE and one after). \
Use only one."
.to_string(),
));
}
Ok(())
}

/// Validate that the parse tree has no errors
Expand Down Expand Up @@ -107,10 +133,23 @@ impl<'a> SourceTree<'a> {
.collect()
}

/// Extract the SQL portion of the query (before VISUALISE)
/// Extract the SQL portion of the query (before VISUALISE).
///
/// Two rewrites happen here so the returned SQL is always something a
/// plain SQL reader can execute:
///
/// If VISUALISE FROM is used, this injects "SELECT * FROM <source>"
/// Returns None if there's no SQL portion and no VISUALISE FROM injection needed
/// - DuckDB-style FROM-first: the grammar parses bare `FROM t` as a
/// `from_statement`. Each such statement is rewritten by prepending
/// `SELECT * ` — so `FROM sales VISUALISE …` becomes
/// `SELECT * FROM sales`.
/// - `VISUALISE FROM <source>`: the FROM appears on the VISUALISE clause.
/// We append `SELECT * FROM <source>` to the SQL so the reader sees an
/// executable query.
///
/// Returns `None` if there's no SQL portion and no VISUALISE FROM to
/// inject. The ambiguous double-FROM case (`FROM a VISUALISE FROM b …`)
/// is rejected in `SourceTree::new`, so any tree reaching here has at
/// most one of the two FROMs.
pub fn extract_sql(&self) -> Option<String> {
let root = self.root();

Expand All @@ -124,28 +163,52 @@ impl<'a> SourceTree<'a> {
}

// Find sql_portion node and extract its text
let sql_text = self
.find_node(&root, "(sql_portion) @sql")
let sql_portion_node = self.find_node(&root, "(sql_portion) @sql");
let mut sql_text = sql_portion_node
.map(|node| self.get_text(&node))
.unwrap_or_default();

// Check if any VISUALISE statement has FROM clause
let from_query = r#"
(visualise_statement
(from_clause
(table_ref) @table))
"#;
// DuckDB-style FROM-first: the grammar recognizes bare `FROM t` as an
// sql_statement variant. Rewrite each such occurrence by prepending
// `SELECT * `.
if let Some(sql_portion) = sql_portion_node {
let from_stmts = self.find_nodes(&sql_portion, "(from_statement) @from_stmt");
if !from_stmts.is_empty() {
let portion_start = sql_portion.start_byte();
let portion_end = sql_portion.end_byte();
let mut stmts: Vec<Node> = from_stmts;
stmts.sort_by_key(|n| n.start_byte());
let mut out = String::with_capacity(sql_text.len() + stmts.len() * 9);
let mut cursor = portion_start;
for stmt in stmts {
let s = stmt.start_byte();
out.push_str(&self.source[cursor..s]);
out.push_str("SELECT * ");
cursor = s;
}
out.push_str(&self.source[cursor..portion_end]);
sql_text = out;
}
}

// VISUALISE FROM <source>: append "SELECT * FROM <source>".
let viz_from = self.find_text(
&root,
r#"
(visualise_statement
(from_clause
(table_ref) @table))
"#,
);

if let Some(from_identifier) = self.find_text(&root, from_query) {
// Inject SELECT * FROM <source>
if let Some(from_identifier) = viz_from {
let result = if sql_text.trim().is_empty() {
format!("SELECT * FROM {}", from_identifier)
} else {
format!("{} SELECT * FROM {}", sql_text.trim(), from_identifier)
};
Some(result)
} else {
// No injection needed - return SQL if not empty
let trimmed = sql_text.trim();
if trimmed.is_empty() {
None
Expand Down Expand Up @@ -288,6 +351,120 @@ mod tests {
assert!(!sql.contains("SELECT * FROM SELECT")); // Make sure we didn't double-inject
}

// ========================================================================
// FROM-first (DuckDB-style) tests
// ========================================================================

#[test]
fn test_extract_sql_from_first_simple() {
let query = "FROM mtcars VISUALISE DRAW point MAPPING mpg AS x, hp AS y";
let tree = SourceTree::new(query).unwrap();

let sql = tree.extract_sql().unwrap();
assert!(sql.contains("SELECT * FROM mtcars"));

let viz = tree.extract_visualise().unwrap();
assert!(viz.starts_with("VISUALISE"));
}

#[test]
fn test_extract_sql_from_first_with_where() {
let query = "FROM sales WHERE year = 2024 VISUALISE DRAW point MAPPING x AS x, y AS y";
let tree = SourceTree::new(query).unwrap();

let sql = tree.extract_sql().unwrap();
assert!(sql.contains("SELECT * FROM sales WHERE year = 2024"));
}

#[test]
fn test_extract_sql_from_first_with_cte() {
let query =
"WITH cte AS (SELECT * FROM x) FROM cte VISUALISE DRAW point MAPPING a AS x, b AS y";
let tree = SourceTree::new(query).unwrap();

let sql = tree.extract_sql().unwrap();
assert!(sql.contains("WITH cte AS (SELECT * FROM x)"));
assert!(sql.contains("SELECT * FROM cte"));
}

#[test]
fn test_extract_sql_from_first_after_create() {
let query =
"CREATE TABLE x AS SELECT 1; FROM x VISUALISE DRAW point MAPPING a AS x, b AS y";
let tree = SourceTree::new(query).unwrap();

let sql = tree.extract_sql().unwrap();
assert!(sql.contains("CREATE TABLE x AS SELECT 1"));
assert!(sql.contains("SELECT * FROM x"));
}

#[test]
fn test_extract_sql_from_first_file_path() {
let query = "FROM 'mtcars.csv' VISUALISE DRAW point MAPPING mpg AS x, hp AS y";
let tree = SourceTree::new(query).unwrap();

let sql = tree.extract_sql().unwrap();
assert!(sql.contains("SELECT * FROM 'mtcars.csv'"));
}

#[test]
fn test_extract_sql_from_first_case_insensitive() {
let query = "from sales visualise DRAW point MAPPING x AS x, y AS y";
let tree = SourceTree::new(query).unwrap();

let sql = tree.extract_sql().unwrap();
assert!(sql.contains("SELECT * from sales"));
}

#[test]
fn test_extract_sql_no_rewrite_when_select_precedes_from() {
// Regression: `SELECT a, b FROM t VISUALISE ...` must NOT trigger
// SELECT * injection — the FROM belongs to the SELECT.
let query = "SELECT a, b FROM t VISUALISE DRAW point MAPPING a AS x, b AS y";
let tree = SourceTree::new(query).unwrap();

let sql = tree.extract_sql().unwrap();
assert_eq!(sql, "SELECT a, b FROM t");
}

#[test]
fn test_double_from_rejected_at_parse() {
// Leading FROM + VISUALISE FROM is ambiguous and must error at parse
// time (before any extract_sql call).
let query = "FROM a VISUALISE FROM b DRAW point MAPPING x AS x, y AS y";
let err = SourceTree::new(query).unwrap_err();
let msg = format!("{}", err);
assert!(
msg.contains("two FROM clauses"),
"expected double-FROM rejection, got: {}",
msg
);
}

#[test]
fn test_extract_sql_from_first_skips_string_contents() {
// A FROM inside a string literal in a preceding statement should be
// parsed as part of the string, not mistaken for a bare FROM.
let query =
"CREATE TABLE x AS SELECT 'FROM fake' AS col; FROM x VISUALISE DRAW point MAPPING a AS x, b AS y";
let tree = SourceTree::new(query).unwrap();

let sql = tree.extract_sql().unwrap();
// Injected SELECT * precedes the real FROM, not the string one.
assert!(sql.contains("'FROM fake'"));
assert!(sql.contains("SELECT * FROM x"));
}

#[test]
fn test_extract_sql_from_first_skips_line_comment() {
let query = "-- FROM fake\nFROM real VISUALISE DRAW point MAPPING a AS x, b AS y";
let tree = SourceTree::new(query).unwrap();

let sql = tree.extract_sql().unwrap();
assert!(sql.contains("SELECT * FROM real"));
assert!(!sql.contains("SELECT * -- FROM fake"));
}

#[test]
fn test_extract_sql_visualise_from_file_path_single_quotes() {
let query = "VISUALISE FROM 'mtcars.csv' DRAW point MAPPING mpg AS x, hp AS y";
Expand Down
Loading
Loading