Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add support for duckdb style "FROM <tbl>" statements #1144

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
62 changes: 40 additions & 22 deletions derive/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,34 @@ use quote::{format_ident, quote, quote_spanned, ToTokens};
use syn::spanned::Spanned;
use syn::{
parse::{Parse, ParseStream},
parse_macro_input, parse_quote, Attribute, Data, DeriveInput,
Fields, GenericParam, Generics, Ident, Index, LitStr, Meta, Token
parse_macro_input, parse_quote, Attribute, Data, DeriveInput, Fields, GenericParam, Generics,
Ident, Index, LitStr, Meta, Token,
};


/// Implementation of `[#derive(Visit)]`
#[proc_macro_derive(VisitMut, attributes(visit))]
pub fn derive_visit_mut(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
derive_visit(input, &VisitType {
visit_trait: quote!(VisitMut),
visitor_trait: quote!(VisitorMut),
modifier: Some(quote!(mut)),
})
derive_visit(
input,
&VisitType {
visit_trait: quote!(VisitMut),
visitor_trait: quote!(VisitorMut),
modifier: Some(quote!(mut)),
},
)
}

/// Implementation of `[#derive(Visit)]`
#[proc_macro_derive(Visit, attributes(visit))]
pub fn derive_visit_immutable(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
derive_visit(input, &VisitType {
visit_trait: quote!(Visit),
visitor_trait: quote!(Visitor),
modifier: None,
})
derive_visit(
input,
&VisitType {
visit_trait: quote!(Visit),
visitor_trait: quote!(Visitor),
modifier: None,
},
)
}

struct VisitType {
Expand All @@ -34,15 +39,16 @@ struct VisitType {
modifier: Option<TokenStream>,
}

fn derive_visit(
input: proc_macro::TokenStream,
visit_type: &VisitType,
) -> proc_macro::TokenStream {
fn derive_visit(input: proc_macro::TokenStream, visit_type: &VisitType) -> proc_macro::TokenStream {
// Parse the input tokens into a syntax tree.
let input = parse_macro_input!(input as DeriveInput);
let name = input.ident;

let VisitType { visit_trait, visitor_trait, modifier } = visit_type;
let VisitType {
visit_trait,
visitor_trait,
modifier,
} = visit_type;

let attributes = Attributes::parse(&input.attrs);
// Add a bound `T: Visit` to every type parameter T.
Expand Down Expand Up @@ -87,7 +93,10 @@ impl Parse for WithIdent {
let mut result = WithIdent { with: None };
let ident = input.parse::<Ident>()?;
if ident != "with" {
return Err(syn::Error::new(ident.span(), "Expected identifier to be `with`"));
return Err(syn::Error::new(
ident.span(),
"Expected identifier to be `with`",
));
}
input.parse::<Token!(=)>()?;
let s = input.parse::<LitStr>()?;
Expand Down Expand Up @@ -131,17 +140,26 @@ impl Attributes {
}

// Add a bound `T: Visit` to every type parameter T.
fn add_trait_bounds(mut generics: Generics, VisitType{visit_trait, ..}: &VisitType) -> Generics {
fn add_trait_bounds(mut generics: Generics, VisitType { visit_trait, .. }: &VisitType) -> Generics {
for param in &mut generics.params {
if let GenericParam::Type(ref mut type_param) = *param {
type_param.bounds.push(parse_quote!(sqlparser::ast::#visit_trait));
type_param
.bounds
.push(parse_quote!(sqlparser::ast::#visit_trait));
}
}
generics
}

// Generate the body of the visit implementation for the given type
fn visit_children(data: &Data, VisitType{visit_trait, modifier, ..}: &VisitType) -> TokenStream {
fn visit_children(
data: &Data,
VisitType {
visit_trait,
modifier,
..
}: &VisitType,
) -> TokenStream {
match data {
Data::Struct(data) => match &data.fields {
Fields::Named(fields) => {
Expand Down
70 changes: 70 additions & 0 deletions src/ast/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,10 +245,80 @@ pub struct Select {
pub named_window: Vec<NamedWindowDefinition>,
/// QUALIFY (Snowflake)
pub qualify: Option<Expr>,
/// Returns true if `FROM` is before `SELECT` in the original SQL.
/// such as is the case for duckdb style `FROM t1 SELECT *`
pub from_before_select: bool,
}

impl Select {
fn display_for_from(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "FROM {}", display_comma_separated(&self.from))?;
if !self.projection.is_empty() {
write!(f, " SELECT")?;
if let Some(ref distinct) = self.distinct {
write!(f, " {distinct}")?;
}
if let Some(ref top) = self.top {
write!(f, " {top}")?;
}
write!(f, " {}", display_comma_separated(&self.projection))?;
}

if let Some(ref into) = self.into {
write!(f, " {into}")?;
}

if !self.lateral_views.is_empty() {
for lv in &self.lateral_views {
write!(f, "{lv}")?;
}
}
if let Some(ref selection) = self.selection {
write!(f, " WHERE {selection}")?;
}
match &self.group_by {
GroupByExpr::All => write!(f, " GROUP BY ALL")?,
GroupByExpr::Expressions(exprs) => {
if !exprs.is_empty() {
write!(f, " GROUP BY {}", display_comma_separated(exprs))?;
}
}
}
if !self.cluster_by.is_empty() {
write!(
f,
" CLUSTER BY {}",
display_comma_separated(&self.cluster_by)
)?;
}
if !self.distribute_by.is_empty() {
write!(
f,
" DISTRIBUTE BY {}",
display_comma_separated(&self.distribute_by)
)?;
}
if !self.sort_by.is_empty() {
write!(f, " SORT BY {}", display_comma_separated(&self.sort_by))?;
}
if let Some(ref having) = self.having {
write!(f, " HAVING {having}")?;
}
if !self.named_window.is_empty() {
write!(f, " WINDOW {}", display_comma_separated(&self.named_window))?;
}
if let Some(ref qualify) = self.qualify {
write!(f, " QUALIFY {qualify}")?;
}
Ok(())
}
}

impl fmt::Display for Select {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if self.from_before_select {
return self.display_for_from(f);
}
write!(f, "SELECT")?;
if let Some(ref distinct) = self.distinct {
write!(f, " {distinct}")?;
Expand Down
166 changes: 164 additions & 2 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
use crate::ast::helpers::stmt_create_table::{BigQueryTableConfiguration, CreateTableBuilder};
use crate::ast::*;
use crate::dialect::*;
use crate::keywords::{self, Keyword, ALL_KEYWORDS};

Check warning on line 33 in src/parser/mod.rs

View workflow job for this annotation

GitHub Actions / test (nightly)

the item `keywords` is imported redundantly

Check warning on line 33 in src/parser/mod.rs

View workflow job for this annotation

GitHub Actions / test (nightly)

the item `keywords` is imported redundantly
use crate::tokenizer::*;

mod alter;
Expand Down Expand Up @@ -516,6 +516,12 @@
Keyword::MERGE => Ok(self.parse_merge()?),
// `PRAGMA` is sqlite specific https://www.sqlite.org/pragma.html
Keyword::PRAGMA => Ok(self.parse_pragma()?),

// Duckdb style `FROM <table>` or `FROM tbl SELECT ...`
Keyword::FROM if dialect_of!(self is DuckDbDialect | GenericDialect) => {
self.prev_token();
Ok(Statement::Query(Box::new(self.parse_query()?)))
}
// `INSTALL` is duckdb specific https://duckdb.org/docs/extensions/overview
Keyword::INSTALL if dialect_of!(self is DuckDbDialect | GenericDialect) => {
Ok(self.parse_install()?)
Expand Down Expand Up @@ -6458,10 +6464,8 @@
} else {
None
};

if self.parse_keyword(Keyword::INSERT) {
let insert = self.parse_insert()?;

Ok(Query {
with,
body: Box::new(SetExpr::Insert(insert)),
Expand Down Expand Up @@ -6722,6 +6726,8 @@
SetExpr::Values(self.parse_values(is_mysql)?)
} else if self.parse_keyword(Keyword::TABLE) {
SetExpr::Table(Box::new(self.parse_as_table()?))
} else if self.parse_keyword(Keyword::FROM) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have no idea why, but adding this branch causes SO in the recursion tests such as parse_deeply_nested_parens_hits_recursion_limits

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

even if the branch is never met, the presence of it is causing SO.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe @alamb, @andygrove @nickolay knows why adding this condition causes a SO.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This branch probably makes the stack frame size increase in debug builds as it adds some new local variables. I think we could fix the test by decreasing the recursion limit

SetExpr::Select(Box::new(self.parse_from_select()?))
} else {
return self.expected(
"SELECT, VALUES, or a subquery in the query body",
Expand Down Expand Up @@ -6936,6 +6942,162 @@
having,
named_window: named_windows,
qualify,
from_before_select: false,
})
}

/// Parse a duckdb style `FROM` statement without a select.
/// assuming the initial `FROM` was already consumed
pub fn parse_from_select(&mut self) -> Result<Select, ParserError> {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this seems largely copied from the normal select parsing -- is there any way to refactor the code to reduce the duplication

let from = self.parse_comma_separated(Parser::parse_table_and_joins)?;

let distinct = self.parse_all_or_distinct()?;
let top = if self.parse_keyword(Keyword::TOP) {
Some(self.parse_top()?)
} else {
None
};

// FROM <tbl> SELECT ...
let (selection, projection) = if self.parse_keyword(Keyword::SELECT) {
let projection = self.parse_comma_separated(Parser::parse_select_item)?;
let selection = if self.parse_keyword(Keyword::WHERE) {
Some(self.parse_expr()?)
} else {
None
};
(selection, projection)
// FROM <tbl> WHERE <EXPR>
} else if self.parse_keyword(Keyword::WHERE) {
let selection = Some(self.parse_expr()?);

let projection = self
.maybe_parse(|parser| parser.parse_projection())
.unwrap_or_default();
(selection, projection)
} else {
let selection = None;
let projection = self
.maybe_parse(|parser| parser.parse_projection())
.unwrap_or_default();
(selection, projection)
};

let into = if self.parse_keyword(Keyword::INTO) {
let temporary = self
.parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY])
.is_some();
let unlogged = self.parse_keyword(Keyword::UNLOGGED);
let table = self.parse_keyword(Keyword::TABLE);
let name = self.parse_object_name(false)?;
Some(SelectInto {
temporary,
unlogged,
table,
name,
})
} else {
None
};

// Note that for keywords to be properly handled here, they need to be
// added to `RESERVED_FOR_COLUMN_ALIAS` / `RESERVED_FOR_TABLE_ALIAS`,
// otherwise they may be parsed as an alias as part of the `projection`
// or `from`.

let mut lateral_views = vec![];
loop {
if self.parse_keywords(&[Keyword::LATERAL, Keyword::VIEW]) {
let outer = self.parse_keyword(Keyword::OUTER);
let lateral_view = self.parse_expr()?;
let lateral_view_name = self.parse_object_name(false)?;
let lateral_col_alias = self
.parse_comma_separated(|parser| {
parser.parse_optional_alias(&[
Keyword::WHERE,
Keyword::GROUP,
Keyword::CLUSTER,
Keyword::HAVING,
Keyword::LATERAL,
]) // This couldn't possibly be a bad idea
})?
.into_iter()
.flatten()
.collect();

lateral_views.push(LateralView {
lateral_view,
lateral_view_name,
lateral_col_alias,
outer,
});
} else {
break;
}
}

let group_by = if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) {
if self.parse_keyword(Keyword::ALL) {
GroupByExpr::All
} else {
GroupByExpr::Expressions(self.parse_comma_separated(Parser::parse_group_by_expr)?)
}
} else {
GroupByExpr::Expressions(vec![])
};

let cluster_by = if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) {
self.parse_comma_separated(Parser::parse_expr)?
} else {
vec![]
};

let distribute_by = if self.parse_keywords(&[Keyword::DISTRIBUTE, Keyword::BY]) {
self.parse_comma_separated(Parser::parse_expr)?
} else {
vec![]
};

let sort_by = if self.parse_keywords(&[Keyword::SORT, Keyword::BY]) {
self.parse_comma_separated(Parser::parse_expr)?
} else {
vec![]
};

let having = if self.parse_keyword(Keyword::HAVING) {
Some(self.parse_expr()?)
} else {
None
};

let named_windows = if self.parse_keyword(Keyword::WINDOW) {
self.parse_comma_separated(Parser::parse_named_window)?
} else {
vec![]
};

let qualify = if self.parse_keyword(Keyword::QUALIFY) {
Some(self.parse_expr()?)
} else {
None
};

Ok(Select {
distinct,
top,
projection,
into,
from,
lateral_views,
selection,
group_by,
cluster_by,
distribute_by,
sort_by,
having,
named_window: named_windows,
qualify,
from_before_select: true,
})
}

Expand Down
1 change: 1 addition & 0 deletions tests/sqlparser_clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
use test_utils::*;

use sqlparser::ast::Expr::{BinaryOp, Identifier, MapAccess};
use sqlparser::ast::Ident;

Check warning on line 22 in tests/sqlparser_clickhouse.rs

View workflow job for this annotation

GitHub Actions / test (nightly)

the item `Ident` is imported redundantly
use sqlparser::ast::SelectItem::UnnamedExpr;
use sqlparser::ast::TableFactor::Table;
use sqlparser::ast::*;
Expand Down Expand Up @@ -110,6 +110,7 @@
having: None,
named_window: vec![],
qualify: None,
from_before_select: false
},
select
);
Expand Down