From 172ba42001003bce79708141923ba6100cc7d4f2 Mon Sep 17 00:00:00 2001 From: Alex Kyllo Date: Sun, 12 Jan 2020 20:20:48 -0800 Subject: [PATCH 001/122] Add support for MSSQL's SELECT TOP N syntax (#150) Add support for MSSQL SELECT TOP (N) [PERCENT] [WITH TIES] syntax. --- src/ast/mod.rs | 2 +- src/ast/query.rs | 33 +++++++++++++++++++++++++------ src/dialect/keywords.rs | 3 ++- src/parser.rs | 31 ++++++++++++++++++++++++++++- src/tokenizer.rs | 23 ++++++++++++++++++++++ tests/sqlparser_mssql.rs | 42 ++++++++++++++++++++++++++++++++++++++++ tests/sqlparser_mysql.rs | 2 +- 7 files changed, 126 insertions(+), 10 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index eaf99b31b..2f723f012 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -27,7 +27,7 @@ pub use self::ddl::{ pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ Cte, Fetch, Join, JoinConstraint, JoinOperator, OrderByExpr, Query, Select, SelectItem, - SetExpr, SetOperator, TableAlias, TableFactor, TableWithJoins, Values, + SetExpr, SetOperator, TableAlias, TableFactor, TableWithJoins, Top, Values, }; pub use self::value::{DateTimeField, Value}; diff --git a/src/ast/query.rs b/src/ast/query.rs index 656f7f14b..a5eea141f 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -114,6 +114,8 @@ impl fmt::Display for SetOperator { #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Select { pub distinct: bool, + /// MSSQL syntax: `TOP () [ PERCENT ] [ WITH TIES ]` + pub top: Option, /// projection expressions pub projection: Vec, /// FROM @@ -128,12 +130,11 @@ pub struct Select { impl fmt::Display for Select { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "SELECT{} {}", - if self.distinct { " DISTINCT" } else { "" }, - display_comma_separated(&self.projection) - )?; + write!(f, "SELECT{}", if self.distinct { " DISTINCT" } else { "" })?; + if let Some(ref top) = self.top { + write!(f, " {}", top)?; + } + write!(f, " {}", display_comma_separated(&self.projection))?; if !self.from.is_empty() { write!(f, " FROM {}", display_comma_separated(&self.from))?; } @@ -408,6 +409,26 @@ impl fmt::Display for Fetch { } } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Top { + /// SQL semantic equivalent of LIMIT but with same structure as FETCH. + pub with_ties: bool, + pub percent: bool, + pub quantity: Option, +} + +impl fmt::Display for Top { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let extension = if self.with_ties { " WITH TIES" } else { "" }; + if let Some(ref quantity) = self.quantity { + let percent = if self.percent { " PERCENT" } else { "" }; + write!(f, "TOP ({}){}{}", quantity, percent, extension) + } else { + write!(f, "TOP{}", extension) + } + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Values(pub Vec>); diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index c083c0692..9795f2af3 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -374,6 +374,7 @@ define_keywords!( TIMEZONE_HOUR, TIMEZONE_MINUTE, TO, + TOP, TRAILING, TRANSACTION, TRANSLATE, @@ -426,7 +427,7 @@ define_keywords!( /// can be parsed unambiguously without looking ahead. pub const RESERVED_FOR_TABLE_ALIAS: &[&str] = &[ // Reserved as both a table and a column alias: - WITH, SELECT, WHERE, GROUP, HAVING, ORDER, LIMIT, OFFSET, FETCH, UNION, EXCEPT, INTERSECT, + WITH, SELECT, WHERE, GROUP, HAVING, ORDER, TOP, LIMIT, OFFSET, FETCH, UNION, EXCEPT, INTERSECT, // Reserved only as a table alias in the `FROM`/`JOIN` clauses: ON, JOIN, INNER, CROSS, FULL, LEFT, RIGHT, NATURAL, USING, // for MSSQL-specific OUTER APPLY (seems reserved in most dialects) diff --git a/src/parser.rs b/src/parser.rs index cbdcaba09..c9e32ed3b 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -783,7 +783,6 @@ impl Parser { } /// Bail out if the current token is not one of the expected keywords, or consume it if it is - #[must_use] pub fn expect_one_of_keywords( &mut self, keywords: &[&'static str], @@ -1561,6 +1560,13 @@ impl Parser { if all && distinct { return parser_err!("Cannot specify both ALL and DISTINCT in SELECT"); } + + let top = if self.parse_keyword("TOP") { + Some(self.parse_top()?) + } else { + None + }; + let projection = self.parse_comma_separated(Parser::parse_select_item)?; // Note that for keywords to be properly handled here, they need to be @@ -1594,6 +1600,7 @@ impl Parser { Ok(Select { distinct, + top, projection, from, selection, @@ -1940,6 +1947,28 @@ impl Parser { Ok(OrderByExpr { expr, asc }) } + /// Parse a TOP clause, MSSQL equivalent of LIMIT, + /// that follows after SELECT [DISTINCT]. + pub fn parse_top(&mut self) -> Result { + let quantity = if self.consume_token(&Token::LParen) { + let quantity = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Some(quantity) + } else { + Some(Expr::Value(self.parse_number_value()?)) + }; + + let percent = self.parse_keyword("PERCENT"); + + let with_ties = self.parse_keywords(vec!["WITH", "TIES"]); + + Ok(Top { + with_ties, + percent, + quantity, + }) + } + /// Parse a LIMIT clause pub fn parse_limit(&mut self) -> Result, ParserError> { if self.parse_keyword("ALL") { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 62d534895..96c9535ea 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -522,6 +522,7 @@ fn peeking_take_while( #[cfg(test)] mod tests { use super::super::dialect::GenericDialect; + use super::super::dialect::MsSqlDialect; use super::*; #[test] @@ -782,6 +783,28 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_mssql_top() { + let sql = "SELECT TOP 5 [bar] FROM foo"; + let dialect = MsSqlDialect {}; + let mut tokenizer = Tokenizer::new(&dialect, sql); + let tokens = tokenizer.tokenize().unwrap(); + let expected = vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::make_keyword("TOP"), + Token::Whitespace(Whitespace::Space), + Token::Number(String::from("5")), + Token::Whitespace(Whitespace::Space), + Token::make_word("bar", Some('[')), + Token::Whitespace(Whitespace::Space), + Token::make_keyword("FROM"), + Token::Whitespace(Whitespace::Space), + Token::make_word("foo", None), + ]; + compare(expected, tokens); + } + fn compare(expected: Vec, actual: Vec) { //println!("------------------------------"); //println!("tokens = {:?}", actual); diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index b5170e208..2774d43ef 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -68,6 +68,48 @@ fn parse_mssql_apply_join() { ); } +#[test] +fn parse_mssql_top_paren() { + let sql = "SELECT TOP (5) * FROM foo"; + let select = ms_and_generic().verified_only_select(sql); + let top = select.top.unwrap(); + assert_eq!(Some(Expr::Value(number("5"))), top.quantity); + assert!(!top.percent); +} + +#[test] +fn parse_mssql_top_percent() { + let sql = "SELECT TOP (5) PERCENT * FROM foo"; + let select = ms_and_generic().verified_only_select(sql); + let top = select.top.unwrap(); + assert_eq!(Some(Expr::Value(number("5"))), top.quantity); + assert!(top.percent); +} + +#[test] +fn parse_mssql_top_with_ties() { + let sql = "SELECT TOP (5) WITH TIES * FROM foo"; + let select = ms_and_generic().verified_only_select(sql); + let top = select.top.unwrap(); + assert_eq!(Some(Expr::Value(number("5"))), top.quantity); + assert!(top.with_ties); +} + +#[test] +fn parse_mssql_top_percent_with_ties() { + let sql = "SELECT TOP (10) PERCENT WITH TIES * FROM foo"; + let select = ms_and_generic().verified_only_select(sql); + let top = select.top.unwrap(); + assert_eq!(Some(Expr::Value(number("10"))), top.quantity); + assert!(top.percent); +} + +#[test] +fn parse_mssql_top() { + let sql = "SELECT TOP 5 bar, baz FROM foo"; + let _ = ms_and_generic().one_statement_parses_to(sql, "SELECT TOP (5) bar, baz FROM foo"); +} + fn ms() -> TestedDialects { TestedDialects { dialects: vec![Box::new(MsSqlDialect {})], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index ce9d0053b..cc6433322 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -77,7 +77,7 @@ fn parse_show_columns() { Statement::ShowColumns { extended: false, full: false, - table_name: table_name.clone(), + table_name: table_name, filter: Some(ShowStatementFilter::Where( mysql_and_generic().verified_expr("1 = 2") )), From 3255fd3ea8b5f8e9c63c1825fdcd2fc1fb30cc22 Mon Sep 17 00:00:00 2001 From: Eyal Leshem Date: Sat, 4 Apr 2020 23:21:36 +0300 Subject: [PATCH 002/122] Add support to to table_name inside parenthesis --- src/parser.rs | 15 +++++---------- tests/sqlparser_common.rs | 38 ++++++++++++++++++++++++++++++-------- 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index c9e32ed3b..e988f9c09 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1771,6 +1771,7 @@ impl Parser { // ^ ^ ^ ^ // | | | | // | | | | + // | | | | // | | | (4) belongs to a SetExpr::Query inside the subquery // | | (3) starts a derived table (subquery) // | (2) starts a nested join @@ -1793,18 +1794,12 @@ impl Parser { // Ignore the error and back up to where we were before. // Either we'll be able to parse a valid nested join, or // we won't, and we'll return that error instead. + // + // Even the SQL spec prohibits derived tables and bare + // tables from appearing alone in parentheses, we allowed it + // as some Db's allowed that (snowflake as example) self.index = index; let table_and_joins = self.parse_table_and_joins()?; - match table_and_joins.relation { - TableFactor::NestedJoin { .. } => (), - _ => { - if table_and_joins.joins.is_empty() { - // The SQL spec prohibits derived tables and bare - // tables from appearing alone in parentheses. - self.expected("joined table", self.peek_token())? - } - } - } self.expect_token(&Token::RParen)?; Ok(TableFactor::NestedJoin(Box::new(table_and_joins))) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 71b11f3b7..939673dcd 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1806,11 +1806,19 @@ fn parse_join_nesting() { vec![join(nest!(nest!(nest!(table("b"), table("c")))))] ); - let res = parse_sql_statements("SELECT * FROM (a NATURAL JOIN (b))"); - assert_eq!( - ParserError::ParserError("Expected joined table, found: )".to_string()), - res.unwrap_err() - ); + // Parenthesized table names are non-standard, but supported in Snowflake SQL + let sql = "SELECT * FROM (a NATURAL JOIN (b))"; + let select = verified_only_select(sql); + let from = only(select.from); + + assert_eq!(from.relation, nest!(table("a"), nest!(table("b")))); + + // Double parentheses around table names are non-standard, but supported in Snowflake SQL + let sql = "SELECT * FROM (a NATURAL JOIN ((b)))"; + let select = verified_only_select(sql); + let from = only(select.from); + + assert_eq!(from.relation, nest!(table("a"), nest!(nest!(table("b"))))); } #[test] @@ -1953,10 +1961,24 @@ fn parse_derived_tables() { })) ); - let res = parse_sql_statements("SELECT * FROM ((SELECT 1) AS t)"); + // Nesting a subquery in parentheses is non-standard, but supported in Snowflake SQL + let sql = "SELECT * FROM ((SELECT 1) AS t)"; + let select = verified_only_select(sql); + let from = only(select.from); + assert_eq!( - ParserError::ParserError("Expected joined table, found: )".to_string()), - res.unwrap_err() + from.relation, + TableFactor::NestedJoin(Box::new(TableWithJoins { + relation: TableFactor::Derived { + lateral: false, + subquery: Box::new(verified_query("SELECT 1")), + alias: Some(TableAlias { + name: "t".into(), + columns: vec![], + }) + }, + joins: Vec::new(), + })) ); } From 4ce0eb11aee27f2fd8ae4c31fe982a8f0efca8c1 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Tue, 14 Apr 2020 16:56:16 +0300 Subject: [PATCH 003/122] Fix a new clippy lint (`.nth(0)`) to unbreak CI "iter.next() is equivalent to iter.nth(0), as they both consume the next element, but is more readable." https://rust-lang.github.io/rust-clippy/master/index.html#iter_nth_zero --- examples/cli.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/cli.rs b/examples/cli.rs index 77a0b5014..d0ba9a578 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -40,7 +40,7 @@ fn main() { println!("Parsing from file '{}' using {:?}", &filename, dialect); let contents = fs::read_to_string(&filename) .unwrap_or_else(|_| panic!("Unable to read the file {}", &filename)); - let without_bom = if contents.chars().nth(0).unwrap() as u64 != 0xfeff { + let without_bom = if contents.chars().next().unwrap() as u64 != 0xfeff { contents.as_str() } else { let mut chars = contents.chars(); From dcc624c56128c3d6cba24812cf0747195308309a Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Tue, 14 Apr 2020 19:03:39 +0300 Subject: [PATCH 004/122] Make CI handle missing rustfmt in the nightly --- .travis.yml | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index e90fe8400..425a48ef5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,6 +23,16 @@ rust: - stable before_script: + # Travis installs rust with a non-default "minimal" profile, and advises us + # to add clippy manually: + - rustup component add clippy + # Unfortunately, this method often breaks on the nightly channel, where the + # most recent build might not have all the optional components. + # We explicitly specify `--profile default` to obtain the most recent nightly + # that has rustfmt (we don't care if it's a week old, as we need it only for + # an experimental flag): + - rustup toolchain install nightly --profile default + - pip install 'travis-cargo<0.2' --user && export PATH=$HOME/.local/bin:$PATH - export PATH=$HOME/.cargo/bin:$PATH # `cargo install` fails if the specified binary is already installed, and @@ -33,13 +43,9 @@ before_script: # rebuilds from scratch, ignoring the cache entirely. # # [0]: https://github.com/rust-lang/cargo/issues/2082 - - cargo install cargo-update || echo "cargo-update already installed" - - cargo install cargo-travis || echo "cargo-travis already installed" + - cargo install cargo-update || echo "cargo-update already installed" # for `cargo install-update` + - cargo install cargo-travis || echo "cargo-travis already installed" # for `cargo coveralls` - cargo install-update -a # updates cargo-travis, if the cached version is outdated - - rustup component add clippy - # The license_template_path setting we use to verify copyright headers is - # only available on the nightly rustfmt. - - rustup toolchain install nightly && rustup component add --toolchain nightly rustfmt script: # Clippy must be run first, as its lints are only triggered during @@ -50,6 +56,8 @@ script: - travis-cargo build - travis-cargo test - travis-cargo test -- all-features + # The license_template_path setting we use to verify copyright headers is + # only available on the nightly rustfmt. - cargo +nightly fmt -- --check --config-path <(echo 'license_template_path = "HEADER"') after_success: From baacc956ea26ddb7b8dea4cb9ccd9aac5532e171 Mon Sep 17 00:00:00 2001 From: Alex Dukhno Date: Sun, 19 Apr 2020 13:07:00 +0300 Subject: [PATCH 005/122] derive default for GenericDialect --- src/dialect/generic.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 50054069e..104d3a9a3 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -12,8 +12,8 @@ use crate::dialect::Dialect; -#[derive(Debug)] -pub struct GenericDialect {} +#[derive(Debug, Default)] +pub struct GenericDialect; impl Dialect for GenericDialect { fn is_identifier_start(&self, ch: char) -> bool { From 05a29212ffbf0704a620dfef1b850089d2efa1ab Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Mon, 20 Apr 2020 04:07:21 +0300 Subject: [PATCH 006/122] Update comments (follow-up to PR #155) https://github.com/andygrove/sqlparser-rs/pull/155 --- src/ast/query.rs | 9 +++++---- src/parser.rs | 26 +++++++++++--------------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index a5eea141f..3588257e7 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -225,10 +225,11 @@ pub enum TableFactor { subquery: Box, alias: Option, }, - /// Represents a parenthesized join expression, such as - /// `(foo bar [ baz ... ])`. - /// The inner `TableWithJoins` can have no joins only if its - /// `relation` is itself a `TableFactor::NestedJoin`. + /// Represents a parenthesized table factor. The SQL spec only allows a + /// join expression (`(foo bar [ baz ... ])`) to be nested, + /// possibly several times, but the parser also accepts the non-standard + /// nesting of bare tables (`table_with_joins.joins.is_empty()`), so the + /// name `NestedJoin` is a bit of misnomer. NestedJoin(Box), } diff --git a/src/parser.rs b/src/parser.rs index e988f9c09..cdaf8989d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1771,7 +1771,6 @@ impl Parser { // ^ ^ ^ ^ // | | | | // | | | | - // | | | | // | | | (4) belongs to a SetExpr::Query inside the subquery // | | (3) starts a derived table (subquery) // | (2) starts a nested join @@ -1784,23 +1783,20 @@ impl Parser { // case (3), and the next token would be `NATURAL`. Ok(table_factor) => Ok(table_factor), Err(_) => { - // The '(' we've recently consumed does not start a derived - // table. For valid input this can happen either when the - // token following the paren can't start a query (e.g. `foo` - // in `FROM (foo NATURAL JOIN bar)`, or when the '(' we've - // consumed is followed by another '(' that starts a - // derived table, like (3), or another nested join (2). - // - // Ignore the error and back up to where we were before. - // Either we'll be able to parse a valid nested join, or - // we won't, and we'll return that error instead. - // - // Even the SQL spec prohibits derived tables and bare - // tables from appearing alone in parentheses, we allowed it - // as some Db's allowed that (snowflake as example) + // A parsing error from `parse_derived_table_factor` indicates that + // the '(' we've recently consumed does not start a derived table + // (cases 1, 2, or 4). Ignore the error and back up to where we + // were before - right after the opening '('. self.index = index; + + // Inside the parentheses we expect to find a table factor + // followed by some joins or another level of nesting. let table_and_joins = self.parse_table_and_joins()?; self.expect_token(&Token::RParen)?; + // The SQL spec prohibits derived and bare tables from appearing + // alone in parentheses. We don't enforce this as some databases + // (e.g. Snowflake) allow such syntax. + Ok(TableFactor::NestedJoin(Box::new(table_and_joins))) } } From c0b0b5924d4d06359e3419b75fab03e7704a7edd Mon Sep 17 00:00:00 2001 From: Matt Jibson Date: Mon, 13 Apr 2020 17:59:10 -0600 Subject: [PATCH 007/122] Add support for OFFSET with the ROWS keyword MySQL doesn't support the ROWS part of OFFSET. Teach the parser to remember which variant it saw, including just ROW. --- src/ast/mod.rs | 4 +-- src/ast/query.rs | 35 ++++++++++++++++++-- src/parser.rs | 12 +++++-- tests/sqlparser_common.rs | 70 +++++++++++++++++++++++++++++---------- 4 files changed, 96 insertions(+), 25 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 2f723f012..3ace38c02 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -26,8 +26,8 @@ pub use self::ddl::{ }; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ - Cte, Fetch, Join, JoinConstraint, JoinOperator, OrderByExpr, Query, Select, SelectItem, - SetExpr, SetOperator, TableAlias, TableFactor, TableWithJoins, Top, Values, + Cte, Fetch, Join, JoinConstraint, JoinOperator, Offset, OffsetRows, OrderByExpr, Query, Select, + SelectItem, SetExpr, SetOperator, TableAlias, TableFactor, TableWithJoins, Top, Values, }; pub use self::value::{DateTimeField, Value}; diff --git a/src/ast/query.rs b/src/ast/query.rs index 3588257e7..433421985 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -24,8 +24,8 @@ pub struct Query { pub order_by: Vec, /// `LIMIT { | ALL }` pub limit: Option, - /// `OFFSET { ROW | ROWS }` - pub offset: Option, + /// `OFFSET [ { ROW | ROWS } ]` + pub offset: Option, /// `FETCH { FIRST | NEXT } [ PERCENT ] { ROW | ROWS } | { ONLY | WITH TIES }` pub fetch: Option, } @@ -43,7 +43,7 @@ impl fmt::Display for Query { write!(f, " LIMIT {}", limit)?; } if let Some(ref offset) = self.offset { - write!(f, " OFFSET {} ROWS", offset)?; + write!(f, " {}", offset)?; } if let Some(ref fetch) = self.fetch { write!(f, " {}", fetch)?; @@ -391,6 +391,35 @@ impl fmt::Display for OrderByExpr { } } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Offset { + pub value: Expr, + pub rows: OffsetRows, +} + +impl fmt::Display for Offset { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "OFFSET {}{}", self.value, self.rows) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum OffsetRows { + None, + Row, + Rows, +} + +impl fmt::Display for OffsetRows { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + OffsetRows::None => Ok(()), + OffsetRows::Row => write!(f, " ROW"), + OffsetRows::Rows => write!(f, " ROWS"), + } + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Fetch { pub with_ties: bool, diff --git a/src/parser.rs b/src/parser.rs index cdaf8989d..3b61ad112 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1970,10 +1970,16 @@ impl Parser { } /// Parse an OFFSET clause - pub fn parse_offset(&mut self) -> Result { + pub fn parse_offset(&mut self) -> Result { let value = Expr::Value(self.parse_number_value()?); - self.expect_one_of_keywords(&["ROW", "ROWS"])?; - Ok(value) + let rows = if self.parse_keyword("ROW") { + OffsetRows::Row + } else if self.parse_keyword("ROWS") { + OffsetRows::Rows + } else { + OffsetRows::None + }; + Ok(Offset { value, rows }) } /// Parse a FETCH clause diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 939673dcd..fe4013fac 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2265,34 +2265,52 @@ fn parse_invalid_subquery_without_parens() { #[test] fn parse_offset() { + let expect = Some(Offset { + value: Expr::Value(number("2")), + rows: OffsetRows::Rows, + }); let ast = verified_query("SELECT foo FROM bar OFFSET 2 ROWS"); - assert_eq!(ast.offset, Some(Expr::Value(number("2")))); + assert_eq!(ast.offset, expect); let ast = verified_query("SELECT foo FROM bar WHERE foo = 4 OFFSET 2 ROWS"); - assert_eq!(ast.offset, Some(Expr::Value(number("2")))); + assert_eq!(ast.offset, expect); let ast = verified_query("SELECT foo FROM bar ORDER BY baz OFFSET 2 ROWS"); - assert_eq!(ast.offset, Some(Expr::Value(number("2")))); + assert_eq!(ast.offset, expect); let ast = verified_query("SELECT foo FROM bar WHERE foo = 4 ORDER BY baz OFFSET 2 ROWS"); - assert_eq!(ast.offset, Some(Expr::Value(number("2")))); + assert_eq!(ast.offset, expect); let ast = verified_query("SELECT foo FROM (SELECT * FROM bar OFFSET 2 ROWS) OFFSET 2 ROWS"); - assert_eq!(ast.offset, Some(Expr::Value(number("2")))); + assert_eq!(ast.offset, expect); match ast.body { SetExpr::Select(s) => match only(s.from).relation { TableFactor::Derived { subquery, .. } => { - assert_eq!(subquery.offset, Some(Expr::Value(number("2")))); + assert_eq!(subquery.offset, expect); } _ => panic!("Test broke"), }, _ => panic!("Test broke"), } let ast = verified_query("SELECT 'foo' OFFSET 0 ROWS"); - assert_eq!(ast.offset, Some(Expr::Value(number("0")))); -} - -#[test] -fn parse_singular_row_offset() { - one_statement_parses_to( - "SELECT foo FROM bar OFFSET 1 ROW", - "SELECT foo FROM bar OFFSET 1 ROWS", + assert_eq!( + ast.offset, + Some(Offset { + value: Expr::Value(number("0")), + rows: OffsetRows::Rows, + }) + ); + let ast = verified_query("SELECT 'foo' OFFSET 1 ROW"); + assert_eq!( + ast.offset, + Some(Offset { + value: Expr::Value(number("1")), + rows: OffsetRows::Row, + }) + ); + let ast = verified_query("SELECT 'foo' OFFSET 1"); + assert_eq!( + ast.offset, + Some(Offset { + value: Expr::Value(number("1")), + rows: OffsetRows::None, + }) ); } @@ -2343,7 +2361,13 @@ fn parse_fetch() { let ast = verified_query( "SELECT foo FROM bar WHERE foo = 4 ORDER BY baz OFFSET 2 ROWS FETCH FIRST 2 ROWS ONLY", ); - assert_eq!(ast.offset, Some(Expr::Value(number("2")))); + assert_eq!( + ast.offset, + Some(Offset { + value: Expr::Value(number("2")), + rows: OffsetRows::Rows, + }) + ); assert_eq!(ast.fetch, fetch_first_two_rows_only); let ast = verified_query( "SELECT foo FROM (SELECT * FROM bar FETCH FIRST 2 ROWS ONLY) FETCH FIRST 2 ROWS ONLY", @@ -2359,12 +2383,24 @@ fn parse_fetch() { _ => panic!("Test broke"), } let ast = verified_query("SELECT foo FROM (SELECT * FROM bar OFFSET 2 ROWS FETCH FIRST 2 ROWS ONLY) OFFSET 2 ROWS FETCH FIRST 2 ROWS ONLY"); - assert_eq!(ast.offset, Some(Expr::Value(number("2")))); + assert_eq!( + ast.offset, + Some(Offset { + value: Expr::Value(number("2")), + rows: OffsetRows::Rows, + }) + ); assert_eq!(ast.fetch, fetch_first_two_rows_only); match ast.body { SetExpr::Select(s) => match only(s.from).relation { TableFactor::Derived { subquery, .. } => { - assert_eq!(subquery.offset, Some(Expr::Value(number("2")))); + assert_eq!( + subquery.offset, + Some(Offset { + value: Expr::Value(number("2")), + rows: OffsetRows::Rows, + }) + ); assert_eq!(subquery.fetch, fetch_first_two_rows_only); } _ => panic!("Test broke"), From 06865113d74fe39451f0a4cee3c18f5675392e48 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Mon, 20 Apr 2020 05:43:57 +0300 Subject: [PATCH 008/122] Update comments (follow-up to PR #158) --- src/ast/query.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ast/query.rs b/src/ast/query.rs index 433421985..a26ba2655 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -403,8 +403,10 @@ impl fmt::Display for Offset { } } +/// Stores the keyword after `OFFSET ` #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum OffsetRows { + /// Omitting ROW/ROWS is non-standard MySQL quirk. None, Row, Rows, From 5ad578e3e5c7831de3636d130beb3376d7c56f9d Mon Sep 17 00:00:00 2001 From: Alex Dukhno <5074607+alex-dukhno@users.noreply.github.com> Date: Tue, 21 Apr 2020 16:28:02 +0300 Subject: [PATCH 009/122] Implement CREATE TABLE IF NOT EXISTS (#163) A non-standard feature supported at least by Postgres https://www.postgresql.org/docs/12/sql-createtable.html --- src/ast/mod.rs | 5 +++- src/parser.rs | 3 +++ tests/sqlparser_common.rs | 3 +++ tests/sqlparser_postgres.rs | 52 +++++++++++++++++++++++++++++++++++++ 4 files changed, 62 insertions(+), 1 deletion(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 3ace38c02..98637e697 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -471,6 +471,7 @@ pub enum Statement { columns: Vec, constraints: Vec, with_options: Vec, + if_not_exists: bool, external: bool, file_format: Option, location: Option, @@ -623,14 +624,16 @@ impl fmt::Display for Statement { columns, constraints, with_options, + if_not_exists, external, file_format, location, } => { write!( f, - "CREATE {}TABLE {} ({}", + "CREATE {}TABLE {}{} ({}", if *external { "EXTERNAL " } else { "" }, + if *if_not_exists { "IF NOT EXISTS " } else { "" }, name, display_comma_separated(columns) )?; diff --git a/src/parser.rs b/src/parser.rs index 3b61ad112..0939b7e39 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -880,6 +880,7 @@ impl Parser { columns, constraints, with_options: vec![], + if_not_exists: false, external: true, file_format: Some(file_format), location: Some(location), @@ -932,6 +933,7 @@ impl Parser { } pub fn parse_create_table(&mut self) -> Result { + let if_not_exists = self.parse_keywords(vec!["IF", "NOT", "EXISTS"]); let table_name = self.parse_object_name()?; // parse optional column list (schema) let (columns, constraints) = self.parse_columns()?; @@ -942,6 +944,7 @@ impl Parser { columns, constraints, with_options, + if_not_exists, external: false, file_format: None, location: None, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index fe4013fac..c62fc86d7 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -909,6 +909,7 @@ fn parse_create_table() { columns, constraints, with_options, + if_not_exists: false, external: false, file_format: None, location: None, @@ -1045,6 +1046,7 @@ fn parse_create_external_table() { columns, constraints, with_options, + if_not_exists, external, file_format, location, @@ -1086,6 +1088,7 @@ fn parse_create_external_table() { assert_eq!("/tmp/example.csv", location.unwrap()); assert_eq!(with_options, vec![]); + assert!(!if_not_exists); } _ => unreachable!(), } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index a903ced64..bd467dc74 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -39,6 +39,7 @@ fn parse_create_table_with_defaults() { columns, constraints, with_options, + if_not_exists: false, external: false, file_format: None, location: None, @@ -225,6 +226,57 @@ fn parse_create_table_with_inherit() { pg().verified_stmt(sql); } +#[test] +fn parse_create_table_if_not_exists() { + let sql = "CREATE TABLE IF NOT EXISTS uk_cities ()"; + let ast = + pg_and_generic().one_statement_parses_to(sql, "CREATE TABLE IF NOT EXISTS uk_cities ()"); + match ast { + Statement::CreateTable { + name, + columns: _columns, + constraints, + with_options, + if_not_exists: true, + external: false, + file_format: None, + location: None, + } => { + assert_eq!("uk_cities", name.to_string()); + assert!(constraints.is_empty()); + assert_eq!(with_options, vec![]); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_bad_if_not_exists() { + let res = pg().parse_sql_statements("CREATE TABLE NOT EXISTS uk_cities ()"); + assert_eq!( + ParserError::ParserError("Expected end of statement, found: EXISTS".to_string()), + res.unwrap_err() + ); + + let res = pg().parse_sql_statements("CREATE TABLE IF EXISTS uk_cities ()"); + assert_eq!( + ParserError::ParserError("Expected end of statement, found: EXISTS".to_string()), + res.unwrap_err() + ); + + let res = pg().parse_sql_statements("CREATE TABLE IF uk_cities ()"); + assert_eq!( + ParserError::ParserError("Expected end of statement, found: uk_cities".to_string()), + res.unwrap_err() + ); + + let res = pg().parse_sql_statements("CREATE TABLE IF NOT uk_cities ()"); + assert_eq!( + ParserError::ParserError("Expected end of statement, found: NOT".to_string()), + res.unwrap_err() + ); +} + #[test] fn parse_copy_example() { let sql = r#"COPY public.actor (actor_id, first_name, last_name, last_update, value) FROM stdin; From 40853fe4120dca02f01cdc873b7b10ecd21d8ea6 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 10 May 2020 21:06:11 +0300 Subject: [PATCH 010/122] Fix a recently implemented clippy lint https://rust-lang.github.io/rust-clippy/master/index.html#single_component_path_imports "Import with single component use path such as `use cratename;` is not necessary, and thus should be removed." --- examples/cli.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/cli.rs b/examples/cli.rs index d0ba9a578..917629e43 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -12,8 +12,6 @@ #![warn(clippy::all)] -use simple_logger; - ///! A small command-line app to run the parser. /// Run with `cargo run --example cli` use std::fs; From 327e6cd9f1751e65461e867ea5c58f185d1a5f19 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 10 May 2020 21:21:01 +0300 Subject: [PATCH 011/122] Report an error for unterminated string literals ...updated the TODOs regarding single-quoted literals parsing while at it. --- src/parser.rs | 10 +++++++++ src/tokenizer.rs | 47 ++++++++++++++++++++++++++++++--------- tests/sqlparser_common.rs | 2 +- 3 files changed, 48 insertions(+), 11 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 0939b7e39..7c6a401d5 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1324,6 +1324,16 @@ impl Parser { } // MSSQL supports single-quoted strings as aliases for columns // We accept them as table aliases too, although MSSQL does not. + // + // Note, that this conflicts with an obscure rule from the SQL + // standard, which we don't implement: + // https://crate.io/docs/sql-99/en/latest/chapters/07.html#character-string-literal-s + // "[Obscure Rule] SQL allows you to break a long up into two or more smaller s, split by a that includes a newline + // character. When it sees such a , your DBMS will + // ignore the and treat the multiple strings as + // a single ." Some(Token::SingleQuotedString(ref s)) => Ok(Some(Ident::with_quote('\'', s.clone()))), not_an_ident => { if after_as { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 96c9535ea..86452a445 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -278,7 +278,7 @@ impl<'a> Tokenizer<'a> { match chars.peek() { Some('\'') => { // N'...' - a - let s = self.tokenize_single_quoted_string(chars); + let s = self.tokenize_single_quoted_string(chars)?; Ok(Some(Token::NationalStringLiteral(s))) } _ => { @@ -295,7 +295,7 @@ impl<'a> Tokenizer<'a> { match chars.peek() { Some('\'') => { // X'...' - a - let s = self.tokenize_single_quoted_string(chars); + let s = self.tokenize_single_quoted_string(chars)?; Ok(Some(Token::HexStringLiteral(s))) } _ => { @@ -313,7 +313,7 @@ impl<'a> Tokenizer<'a> { } // string '\'' => { - let s = self.tokenize_single_quoted_string(chars); + let s = self.tokenize_single_quoted_string(chars)?; Ok(Some(Token::SingleQuotedString(s))) } // delimited (quoted) identifier @@ -431,11 +431,10 @@ impl<'a> Tokenizer<'a> { } /// Read a single quoted string, starting with the opening quote. - fn tokenize_single_quoted_string(&self, chars: &mut Peekable>) -> String { - //TODO: handle escaped quotes in string - //TODO: handle newlines in string - //TODO: handle EOF before terminating quote - //TODO: handle 'string' 'string continuation' + fn tokenize_single_quoted_string( + &self, + chars: &mut Peekable>, + ) -> Result { let mut s = String::new(); chars.next(); // consume the opening quote while let Some(&ch) = chars.peek() { @@ -447,7 +446,7 @@ impl<'a> Tokenizer<'a> { s.push('\''); chars.next(); } else { - break; + return Ok(s); } } _ => { @@ -456,7 +455,10 @@ impl<'a> Tokenizer<'a> { } } } - s + Err(TokenizerError(format!( + "Unterminated string literal at Line: {}, Col: {}", + self.line, self.col + ))) } fn tokenize_multiline_comment( @@ -640,6 +642,31 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_newline_in_string_literal() { + let sql = String::from("'foo\r\nbar\nbaz'"); + + let dialect = GenericDialect {}; + let mut tokenizer = Tokenizer::new(&dialect, &sql); + let tokens = tokenizer.tokenize().unwrap(); + let expected = vec![Token::SingleQuotedString("foo\r\nbar\nbaz".to_string())]; + compare(expected, tokens); + } + + #[test] + fn tokenize_unterminated_string_literal() { + let sql = String::from("select 'foo"); + + let dialect = GenericDialect {}; + let mut tokenizer = Tokenizer::new(&dialect, &sql); + assert_eq!( + tokenizer.tokenize(), + Err(TokenizerError( + "Unterminated string literal at Line: 1, Col: 8".to_string() + )) + ); + } + #[test] fn tokenize_invalid_string_cols() { let sql = String::from("\n\nSELECT * FROM table\tمصطفىh"); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index c62fc86d7..41ceeae54 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1031,7 +1031,7 @@ fn parse_create_external_table() { name VARCHAR(100) NOT NULL,\ lat DOUBLE NULL,\ lng DOUBLE)\ - STORED AS TEXTFILE LOCATION '/tmp/example.csv"; + STORED AS TEXTFILE LOCATION '/tmp/example.csv'"; let ast = one_statement_parses_to( sql, "CREATE EXTERNAL TABLE uk_cities (\ From 7d60bfd8660056b696c387ca97d42797c585edb2 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 10 May 2020 21:43:40 +0300 Subject: [PATCH 012/122] Update CHANGELOG.md --- CHANGELOG.md | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 23b2fdde1..d552e942f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,10 +6,21 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), Given that the parser produces a typed AST, any changes to the AST will technically be breaking and thus will result in a `0.(N+1)` version. We document changes that break via addition as "Added". ## [Unreleased] -Nothing here yet! Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented changes. +Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented changes. +### Changed +- Change `Ident` (previously a simple `String`) to store the parsed (unquoted) `value` of the identifier and the `quote_style` separately (#143) - thanks @apparebit! +- Support Snowflake's `FROM (table_name)` (#155) - thanks @eyalleshem! + +### Added +- Support MSSQL `TOP () [ PERCENT ] [ WITH TIES ]` (#150) - thanks @alexkyllo! +- Support MySQL `LIMIT row_count OFFSET offset` (not followed by `ROW` or `ROWS`) and remember which variant was parsed (#158) - thanks @mjibson! +- Support PostgreSQL `CREATE TABLE IF NOT EXISTS table_name` (#163) - thanks @alex-dukhno! + +### Fixed +- Report an error for unterminated string literals (#165) -## [0.5.0] - 2019-10-10 +## [0.5.0] - 2019-10-10 ### Changed - Replace the `Value::Long(u64)` and `Value::Double(f64)` variants with `Value::Number(String)` to avoid losing precision when parsing decimal literals (#130) - thanks @benesch! From 8406a938d5a1c61722fe730ba8d7a3190bedd687 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Tue, 26 May 2020 21:33:10 +0300 Subject: [PATCH 013/122] Port the changes made to travis configuration in #159 to GitHub workflows This should fix the build failures due to unavailable components, e.g. error: component 'rustfmt' for target 'x86_64-unknown-linux-gnu' is unavailable for download for channel nightly Sometimes not all components are available in any given nightly. --- .github/workflows/rust.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 0f2507b23..06db11ebf 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -4,14 +4,13 @@ on: [push] jobs: build: - runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 - name: Setup Rust run: | - rustup toolchain install nightly && rustup component add --toolchain nightly rustfmt + rustup toolchain install nightly --profile default rustup toolchain install stable rustup override set stable # Clippy must be run first, as its lints are only triggered during From 5aacc5ebcd5c1345cc7f2597ddfdfc5a926312e6 Mon Sep 17 00:00:00 2001 From: mashuai Date: Fri, 15 May 2020 21:55:25 +0800 Subject: [PATCH 014/122] add create index and drop index support --- src/ast/mod.rs | 33 +++++++++++++++++++++++++++++++++ src/dialect/keywords.rs | 3 ++- src/parser.rs | 27 +++++++++++++++++++++++++-- tests/sqlparser_common.rs | 38 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 98 insertions(+), 3 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 98637e697..e7ae0c731 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -476,6 +476,15 @@ pub enum Statement { file_format: Option, location: Option, }, + /// CREATE INDEX + CreateIndex { + /// index name + name: ObjectName, + table_name: ObjectName, + columns: Vec, + unique: bool, + if_not_exists: bool, + }, /// ALTER TABLE AlterTable { /// Table name @@ -655,6 +664,28 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::CreateIndex { + name, + table_name, + columns, + unique, + if_not_exists, + } => { + write!( + f, + "CREATE{}INDEX{}{} ON {}({}", + if *unique { " UNIQUE " } else { " " }, + if *if_not_exists { + " IF NOT EXISTS " + } else { + " " + }, + name, + table_name, + display_separated(columns, ",") + )?; + write!(f, ");") + } Statement::AlterTable { name, operation } => { write!(f, "ALTER TABLE {} {}", name, operation) } @@ -819,6 +850,7 @@ impl FromStr for FileFormat { pub enum ObjectType { Table, View, + Index, } impl fmt::Display for ObjectType { @@ -826,6 +858,7 @@ impl fmt::Display for ObjectType { f.write_str(match self { ObjectType::Table => "TABLE", ObjectType::View => "VIEW", + ObjectType::Index => "INDEX", }) } } diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index 9795f2af3..bf33f29c9 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -420,7 +420,8 @@ define_keywords!( WORK, YEAR, ZONE, - END_EXEC = "END-EXEC" + END_EXEC = "END-EXEC", + INDEX ); /// These keywords can't be used as a table alias, so that `FROM table_name alias` diff --git a/src/parser.rs b/src/parser.rs index 7c6a401d5..a7cb3ce4a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -855,13 +855,17 @@ impl Parser { pub fn parse_create(&mut self) -> Result { if self.parse_keyword("TABLE") { self.parse_create_table() + } else if self.parse_keyword("INDEX") { + self.parse_create_index(false) + } else if self.parse_keywords(vec!["UNIQUE", "INDEX"]) { + self.parse_create_index(true) } else if self.parse_keyword("MATERIALIZED") || self.parse_keyword("VIEW") { self.prev_token(); self.parse_create_view() } else if self.parse_keyword("EXTERNAL") { self.parse_create_external_table() } else { - self.expected("TABLE or VIEW after CREATE", self.peek_token()) + self.expected("TABLE, VIEW or INDEX after CREATE", self.peek_token()) } } @@ -912,8 +916,10 @@ impl Parser { ObjectType::Table } else if self.parse_keyword("VIEW") { ObjectType::View + } else if self.parse_keyword("INDEX") { + ObjectType::Index } else { - return self.expected("TABLE or VIEW after DROP", self.peek_token()); + return self.expected("TABLE, VIEW or INDEX after DROP", self.peek_token()); }; // Many dialects support the non standard `IF EXISTS` clause and allow // specifying multiple objects to delete in a single statement @@ -932,6 +938,23 @@ impl Parser { }) } + pub fn parse_create_index(&mut self, unique: bool) -> Result { + let if_not_exists = self.parse_keywords(vec!["IF", "NOT", "EXISTS"]); + let index_name = self.parse_object_name()?; + self.expect_keyword("ON")?; + let table_name = self.parse_object_name()?; + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(Parser::parse_identifier)?; + self.expect_token(&Token::RParen)?; + Ok(Statement::CreateIndex { + name: index_name, + table_name, + columns, + unique, + if_not_exists, + }) + } + pub fn parse_create_table(&mut self) -> Result { let if_not_exists = self.parse_keywords(vec!["IF", "NOT", "EXISTS"]); let table_name = self.parse_object_name()?; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 41ceeae54..147ebcd6a 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2631,6 +2631,44 @@ fn ensure_multiple_dialects_are_tested() { let _ = parse_sql_statements("SELECT @foo"); } +#[test] +fn parse_create_index() { + let sql = "CREATE UNIQUE INDEX IF NOT EXISTS idx_name ON test(name,age);"; + let ident_vec = vec![Ident::new("name"), Ident::new("age")]; + match verified_stmt(sql) { + Statement::CreateIndex { + name, + table_name, + columns, + unique, + if_not_exists, + } => { + assert_eq!("idx_name", name.to_string()); + assert_eq!("test", table_name.to_string()); + assert_eq!(ident_vec, columns); + assert_eq!(true, unique); + assert_eq!(true, if_not_exists) + } + _ => unreachable!(), + } +} +#[test] +fn parse_drop_index() { + let sql = "DROP INDEX idx_a"; + match verified_stmt(sql) { + Statement::Drop { + names, object_type, .. + } => { + assert_eq!( + vec!["idx_a"], + names.iter().map(ToString::to_string).collect::>() + ); + assert_eq!(ObjectType::Index, object_type); + } + _ => unreachable!(), + } +} + fn parse_sql_statements(sql: &str) -> Result, ParserError> { all_dialects().parse_sql_statements(sql) } From 320d2f2d0515cc604978736ce00181ec49695eb4 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Wed, 27 May 2020 05:04:22 +0300 Subject: [PATCH 015/122] Update CHANGELOG.md and a fix last-minute review nit --- CHANGELOG.md | 1 + src/dialect/keywords.rs | 4 ++-- src/parser.rs | 4 +--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d552e942f..c65a68615 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented - Support Snowflake's `FROM (table_name)` (#155) - thanks @eyalleshem! ### Added +- Support basic forms of `CREATE INDEX` and `DROP INDEX` (#167) - thanks @mashuai! - Support MSSQL `TOP () [ PERCENT ] [ WITH TIES ]` (#150) - thanks @alexkyllo! - Support MySQL `LIMIT row_count OFFSET offset` (not followed by `ROW` or `ROWS`) and remember which variant was parsed (#158) - thanks @mjibson! - Support PostgreSQL `CREATE TABLE IF NOT EXISTS table_name` (#163) - thanks @alex-dukhno! diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index bf33f29c9..7ab86de2e 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -200,6 +200,7 @@ define_keywords!( IDENTITY, IF, IN, + INDEX, INDICATOR, INNER, INOUT, @@ -420,8 +421,7 @@ define_keywords!( WORK, YEAR, ZONE, - END_EXEC = "END-EXEC", - INDEX + END_EXEC = "END-EXEC" ); /// These keywords can't be used as a table alias, so that `FROM table_name alias` diff --git a/src/parser.rs b/src/parser.rs index a7cb3ce4a..dc8e6b7ea 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -943,9 +943,7 @@ impl Parser { let index_name = self.parse_object_name()?; self.expect_keyword("ON")?; let table_name = self.parse_object_name()?; - self.expect_token(&Token::LParen)?; - let columns = self.parse_comma_separated(Parser::parse_identifier)?; - self.expect_token(&Token::RParen)?; + let columns = self.parse_parenthesized_column_list(Mandatory)?; Ok(Statement::CreateIndex { name: index_name, table_name, From 98f97d09db13501fcbfa852dd5d67417568271f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20M=C3=BCller?= Date: Wed, 27 May 2020 17:24:23 +0200 Subject: [PATCH 016/122] Add support for "on delete cascade" column option (#170) Specifically, `FOREIGN KEY REFERENCES ()` can now be followed by `ON DELETE ` and/or by `ON UPDATE `. --- src/ast/ddl.rs | 53 +++++++++++++++++++++++++++++++++------ src/ast/mod.rs | 3 ++- src/dialect/keywords.rs | 1 + src/parser.rs | 36 +++++++++++++++++++++++++- tests/sqlparser_common.rs | 49 ++++++++++++++++++++++++++++++++++-- 5 files changed, 131 insertions(+), 11 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 7333ad287..776927669 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -155,10 +155,15 @@ pub enum ColumnOption { is_primary: bool, }, /// A referential integrity constraint (`[FOREIGN KEY REFERENCES - /// ()`). + /// () + /// { [ON DELETE ] [ON UPDATE ] | + /// [ON UPDATE ] [ON DELETE ] + /// }`). ForeignKey { foreign_table: ObjectName, referred_columns: Vec, + on_delete: Option, + on_update: Option, }, // `CHECK ()` Check(Expr), @@ -177,12 +182,21 @@ impl fmt::Display for ColumnOption { ForeignKey { foreign_table, referred_columns, - } => write!( - f, - "REFERENCES {} ({})", - foreign_table, - display_comma_separated(referred_columns) - ), + on_delete, + on_update, + } => { + write!(f, "REFERENCES {}", foreign_table)?; + if !referred_columns.is_empty() { + write!(f, " ({})", display_comma_separated(referred_columns))?; + } + if let Some(action) = on_delete { + write!(f, " ON DELETE {}", action)?; + } + if let Some(action) = on_update { + write!(f, " ON UPDATE {}", action)?; + } + Ok(()) + } Check(expr) => write!(f, "CHECK ({})", expr), } } @@ -200,3 +214,28 @@ fn display_constraint_name<'a>(name: &'a Option) -> impl fmt::Display + ' } ConstraintName(name) } + +/// ` = +/// { RESTRICT | CASCADE | SET NULL | NO ACTION | SET DEFAULT }` +/// +/// Used in foreign key constraints in `ON UPDATE` and `ON DELETE` options. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum ReferentialAction { + Restrict, + Cascade, + SetNull, + NoAction, + SetDefault, +} + +impl fmt::Display for ReferentialAction { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(match self { + ReferentialAction::Restrict => "RESTRICT", + ReferentialAction::Cascade => "CASCADE", + ReferentialAction::SetNull => "SET NULL", + ReferentialAction::NoAction => "NO ACTION", + ReferentialAction::SetDefault => "SET DEFAULT", + }) + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index e7ae0c731..bc5c5dafc 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -22,7 +22,8 @@ use std::fmt; pub use self::data_type::DataType; pub use self::ddl::{ - AlterTableOperation, ColumnDef, ColumnOption, ColumnOptionDef, TableConstraint, + AlterTableOperation, ColumnDef, ColumnOption, ColumnOptionDef, ReferentialAction, + TableConstraint, }; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index 7ab86de2e..4f39904b6 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -51,6 +51,7 @@ macro_rules! define_keywords { define_keywords!( ABS, + ACTION, ADD, ASC, ALL, diff --git a/src/parser.rs b/src/parser.rs index dc8e6b7ea..5741167de 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1038,10 +1038,25 @@ impl Parser { ColumnOption::Unique { is_primary: false } } else if self.parse_keyword("REFERENCES") { let foreign_table = self.parse_object_name()?; - let referred_columns = self.parse_parenthesized_column_list(Mandatory)?; + // PostgreSQL allows omitting the column list and + // uses the primary key column of the foreign table by default + let referred_columns = self.parse_parenthesized_column_list(Optional)?; + let mut on_delete = None; + let mut on_update = None; + loop { + if on_delete.is_none() && self.parse_keywords(vec!["ON", "DELETE"]) { + on_delete = Some(self.parse_referential_action()?); + } else if on_update.is_none() && self.parse_keywords(vec!["ON", "UPDATE"]) { + on_update = Some(self.parse_referential_action()?); + } else { + break; + } + } ColumnOption::ForeignKey { foreign_table, referred_columns, + on_delete, + on_update, } } else if self.parse_keyword("CHECK") { self.expect_token(&Token::LParen)?; @@ -1055,6 +1070,25 @@ impl Parser { Ok(ColumnOptionDef { name, option }) } + pub fn parse_referential_action(&mut self) -> Result { + if self.parse_keyword("RESTRICT") { + Ok(ReferentialAction::Restrict) + } else if self.parse_keyword("CASCADE") { + Ok(ReferentialAction::Cascade) + } else if self.parse_keywords(vec!["SET", "NULL"]) { + Ok(ReferentialAction::SetNull) + } else if self.parse_keywords(vec!["NO", "ACTION"]) { + Ok(ReferentialAction::NoAction) + } else if self.parse_keywords(vec!["SET", "DEFAULT"]) { + Ok(ReferentialAction::SetDefault) + } else { + self.expected( + "one of RESTRICT, CASCADE, SET NULL, NO ACTION or SET DEFAULT", + self.peek_token(), + ) + } + } + pub fn parse_optional_table_constraint( &mut self, ) -> Result, ParserError> { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 147ebcd6a..47d7db053 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -893,7 +893,9 @@ fn parse_create_table() { lat DOUBLE NULL,\ lng DOUBLE, constrained INT NULL CONSTRAINT pkey PRIMARY KEY NOT NULL UNIQUE CHECK (constrained > 0), - ref INT REFERENCES othertable (a, b))"; + ref INT REFERENCES othertable (a, b),\ + ref2 INT references othertable2 on delete cascade on update no action\ + )"; let ast = one_statement_parses_to( sql, "CREATE TABLE uk_cities (\ @@ -901,7 +903,8 @@ fn parse_create_table() { lat double NULL, \ lng double, \ constrained int NULL CONSTRAINT pkey PRIMARY KEY NOT NULL UNIQUE CHECK (constrained > 0), \ - ref int REFERENCES othertable (a, b))", + ref int REFERENCES othertable (a, b), \ + ref2 int REFERENCES othertable2 ON DELETE CASCADE ON UPDATE NO ACTION)", ); match ast { Statement::CreateTable { @@ -978,8 +981,24 @@ fn parse_create_table() { option: ColumnOption::ForeignKey { foreign_table: ObjectName(vec!["othertable".into()]), referred_columns: vec!["a".into(), "b".into(),], + on_delete: None, + on_update: None, } }] + }, + ColumnDef { + name: "ref2".into(), + data_type: DataType::Int, + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::ForeignKey { + foreign_table: ObjectName(vec!["othertable2".into()]), + referred_columns: vec![], + on_delete: Some(ReferentialAction::Cascade), + on_update: Some(ReferentialAction::NoAction), + } + },] } ] ); @@ -996,6 +1015,32 @@ fn parse_create_table() { .contains("Expected column option, found: GARBAGE")); } +#[test] +fn parse_create_table_with_multiple_on_delete_fails() { + parse_sql_statements( + "\ + create table X (\ + y_id int references Y (id) \ + on delete cascade on update cascade on delete no action\ + )", + ) + .expect_err("should have failed"); +} + +#[test] +fn parse_create_table_with_on_delete_on_update_2in_any_order() -> Result<(), ParserError> { + let sql = |options: &str| -> String { + format!("create table X (y_id int references Y (id) {})", options) + }; + + parse_sql_statements(&sql("on update cascade on delete no action"))?; + parse_sql_statements(&sql("on delete cascade on update cascade"))?; + parse_sql_statements(&sql("on update no action"))?; + parse_sql_statements(&sql("on delete restrict"))?; + + Ok(()) +} + #[test] fn parse_create_table_with_options() { let sql = "CREATE TABLE t (c int) WITH (foo = 'bar', a = 123)"; From 8d5eaf95b30c9a6ac76970ba37aa2c207daf0ca5 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Wed, 27 May 2020 19:41:31 +0300 Subject: [PATCH 017/122] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c65a68615..81705b536 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented ### Added - Support basic forms of `CREATE INDEX` and `DROP INDEX` (#167) - thanks @mashuai! +- Support `ON { UPDATE | DELETE } { RESTRICT | CASCADE | SET NULL | NO ACTION | SET DEFAULT }` in `FOREIGN KEY` constraints (#170) - thanks @c7hm4r! - Support MSSQL `TOP () [ PERCENT ] [ WITH TIES ]` (#150) - thanks @alexkyllo! - Support MySQL `LIMIT row_count OFFSET offset` (not followed by `ROW` or `ROWS`) and remember which variant was parsed (#158) - thanks @mjibson! - Support PostgreSQL `CREATE TABLE IF NOT EXISTS table_name` (#163) - thanks @alex-dukhno! From a2f4996bdd0ee67a00ba9a39046eb378ccc58b9a Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Tue, 26 May 2020 21:09:58 +0300 Subject: [PATCH 018/122] Update README to point to SQL:2016, instead of 2011 This was discussed in #125, but we forgot to update the README at the time. --- README.md | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 7677f7983..1ff329c26 100644 --- a/README.md +++ b/README.md @@ -47,10 +47,8 @@ published regularly since. Most revisions have added significant new features to the language, and as a result no database claims to support the full breadth of features. This parser currently supports most of the SQL-92 syntax, plus some syntax from newer versions that have been explicitly requested, plus some MSSQL- -and PostgreSQL-specific syntax. Whenever possible, the [online SQL:2011 -grammar][sql-2011-grammar] is used to guide what syntax to accept. (We will -happily accept changes that conform to the SQL:2016 syntax as well, but that -edition's grammar is not yet available online.) +and PostgreSQL-specific syntax. Whenever possible, the [online SQL:2016 +grammar][sql-2016-grammar] is used to guide what syntax to accept. Unfortunately, stating anything more specific about compliance is difficult. There is no publicly available test suite that can assess compliance @@ -105,8 +103,8 @@ If you are unable to submit a patch, feel free to file an issue instead. Please try to include: * some representative examples of the syntax you wish to support or fix; - * the relevant bits of the [SQL grammar][sql-2011-grammar], if the syntax is - part of SQL:2011; and + * the relevant bits of the [SQL grammar][sql-2016-grammar], if the syntax is + part of SQL:2016; and * links to documentation for the feature for a few of the most popular databases that support it. @@ -123,5 +121,5 @@ resources. [DataFusion]: https://github.com/apache/arrow/tree/master/rust/datafusion [LocustDB]: https://github.com/cswinter/LocustDB [Pratt Parser]: https://tdop.github.io/ -[sql-2011-grammar]: https://jakewheat.github.io/sql-overview/sql-2011-foundation-grammar.html +[sql-2016-grammar]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html [sql-standard]: https://en.wikipedia.org/wiki/ISO/IEC_9075 \ No newline at end of file From 1cf9e5ecefc11bfc92dd66060f075c1270b21d30 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Tue, 26 May 2020 21:14:13 +0300 Subject: [PATCH 019/122] Update README: we support bits and pieces from other dialects too --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 1ff329c26..3b277a96b 100644 --- a/README.md +++ b/README.md @@ -46,9 +46,9 @@ SQL was first standardized in 1987, and revisions of the standard have been published regularly since. Most revisions have added significant new features to the language, and as a result no database claims to support the full breadth of features. This parser currently supports most of the SQL-92 syntax, plus some -syntax from newer versions that have been explicitly requested, plus some MSSQL- -and PostgreSQL-specific syntax. Whenever possible, the [online SQL:2016 -grammar][sql-2016-grammar] is used to guide what syntax to accept. +syntax from newer versions that have been explicitly requested, plus some MSSQL, +PostgreSQL, and other dialect-specific syntax. Whenever possible, the [online +SQL:2016 grammar][sql-2016-grammar] is used to guide what syntax to accept. Unfortunately, stating anything more specific about compliance is difficult. There is no publicly available test suite that can assess compliance From 91f769e46028c22e3d2f9dc17022d341df759b01 Mon Sep 17 00:00:00 2001 From: Alex Dukhno Date: Thu, 28 May 2020 19:50:16 +0300 Subject: [PATCH 020/122] added create and drop schema --- src/ast/mod.rs | 5 +++++ src/dialect/keywords.rs | 1 + src/parser.rs | 16 ++++++++++++++-- tests/sqlparser_common.rs | 22 ++++++++++++++++++++++ 4 files changed, 42 insertions(+), 2 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index bc5c5dafc..a867abcf9 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -535,6 +535,8 @@ pub enum Statement { Commit { chain: bool }, /// `ROLLBACK [ TRANSACTION | WORK ] [ AND [ NO ] CHAIN ]` Rollback { chain: bool }, + /// CREATE SCHEMA + CreateSchema { schema_name: ObjectName }, } impl fmt::Display for Statement { @@ -754,6 +756,7 @@ impl fmt::Display for Statement { Statement::Rollback { chain } => { write!(f, "ROLLBACK{}", if *chain { " AND CHAIN" } else { "" },) } + Statement::CreateSchema { schema_name } => write!(f, "CREATE SCHEMA {}", schema_name), } } } @@ -852,6 +855,7 @@ pub enum ObjectType { Table, View, Index, + Schema, } impl fmt::Display for ObjectType { @@ -860,6 +864,7 @@ impl fmt::Display for ObjectType { ObjectType::Table => "TABLE", ObjectType::View => "VIEW", ObjectType::Index => "INDEX", + ObjectType::Schema => "SCHEMA", }) } } diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index 4f39904b6..b8f8817f5 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -330,6 +330,7 @@ define_keywords!( ROW_NUMBER, ROWS, SAVEPOINT, + SCHEMA, SCOPE, SCROLL, SEARCH, diff --git a/src/parser.rs b/src/parser.rs index 5741167de..d235eb169 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -864,11 +864,21 @@ impl Parser { self.parse_create_view() } else if self.parse_keyword("EXTERNAL") { self.parse_create_external_table() + } else if self.parse_keyword("SCHEMA") { + self.parse_create_schema() } else { - self.expected("TABLE, VIEW or INDEX after CREATE", self.peek_token()) + self.expected( + "TABLE, VIEW, INDEX or SCHEMA after CREATE", + self.peek_token(), + ) } } + pub fn parse_create_schema(&mut self) -> Result { + let schema_name = self.parse_object_name()?; + Ok(Statement::CreateSchema { schema_name }) + } + pub fn parse_create_external_table(&mut self) -> Result { self.expect_keyword("TABLE")?; let table_name = self.parse_object_name()?; @@ -918,8 +928,10 @@ impl Parser { ObjectType::View } else if self.parse_keyword("INDEX") { ObjectType::Index + } else if self.parse_keyword("SCHEMA") { + ObjectType::Schema } else { - return self.expected("TABLE, VIEW or INDEX after DROP", self.peek_token()); + return self.expected("TABLE, VIEW, INDEX or SCHEMA after DROP", self.peek_token()); }; // Many dialects support the non standard `IF EXISTS` clause and allow // specifying multiple objects to delete in a single statement diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 47d7db053..7272e35ac 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1027,6 +1027,28 @@ fn parse_create_table_with_multiple_on_delete_fails() { .expect_err("should have failed"); } +#[test] +fn parse_create_schema() { + let sql = "CREATE SCHEMA X"; + + match verified_stmt(sql) { + Statement::CreateSchema { schema_name } => { + assert_eq!(schema_name.to_string(), "X".to_owned()) + } + _ => unreachable!(), + } +} + +#[test] +fn parse_drop_schema() { + let sql = "DROP SCHEMA X"; + + match verified_stmt(sql) { + Statement::Drop { object_type, .. } => assert_eq!(object_type, ObjectType::Schema), + _ => unreachable!(), + } +} + #[test] fn parse_create_table_with_on_delete_on_update_2in_any_order() -> Result<(), ParserError> { let sql = |options: &str| -> String { From 418b9631ce9c24cf9bb26cf7dd9e42edd29de985 Mon Sep 17 00:00:00 2001 From: QP Hou Date: Sat, 30 May 2020 07:05:15 -0700 Subject: [PATCH 021/122] add nulls first/last support to order by expression (#176) Following `` from the standard https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#_10_10_sort_specification_list --- CHANGELOG.md | 6 ++++-- src/ast/query.rs | 18 ++++++++++++++---- src/dialect/keywords.rs | 2 ++ src/parser.rs | 15 ++++++++++++++- tests/sqlparser_common.rs | 31 ++++++++++++++++++++++++++++++- 5 files changed, 64 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 81705b536..d39c76cdb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,11 +13,13 @@ Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented - Support Snowflake's `FROM (table_name)` (#155) - thanks @eyalleshem! ### Added -- Support basic forms of `CREATE INDEX` and `DROP INDEX` (#167) - thanks @mashuai! -- Support `ON { UPDATE | DELETE } { RESTRICT | CASCADE | SET NULL | NO ACTION | SET DEFAULT }` in `FOREIGN KEY` constraints (#170) - thanks @c7hm4r! - Support MSSQL `TOP () [ PERCENT ] [ WITH TIES ]` (#150) - thanks @alexkyllo! - Support MySQL `LIMIT row_count OFFSET offset` (not followed by `ROW` or `ROWS`) and remember which variant was parsed (#158) - thanks @mjibson! - Support PostgreSQL `CREATE TABLE IF NOT EXISTS table_name` (#163) - thanks @alex-dukhno! +- Support basic forms of `CREATE INDEX` and `DROP INDEX` (#167) - thanks @mashuai! +- Support `ON { UPDATE | DELETE } { RESTRICT | CASCADE | SET NULL | NO ACTION | SET DEFAULT }` in `FOREIGN KEY` constraints (#170) - thanks @c7hm4r! +- Support basic forms of `CREATE SCHEMA` and `DROP SCHEMA` (#173) - thanks @alex-dukhno! +- Support `NULLS FIRST`/`LAST` in `ORDER BY` expressions (#176) - thanks @houqp! ### Fixed - Report an error for unterminated string literals (#165) diff --git a/src/ast/query.rs b/src/ast/query.rs index a26ba2655..a5918f1a3 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -374,20 +374,30 @@ pub enum JoinConstraint { Natural, } -/// SQL ORDER BY expression +/// An `ORDER BY` expression #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct OrderByExpr { pub expr: Expr, + /// Optional `ASC` or `DESC` pub asc: Option, + /// Optional `NULLS FIRST` or `NULLS LAST` + pub nulls_first: Option, } impl fmt::Display for OrderByExpr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.expr)?; match self.asc { - Some(true) => write!(f, "{} ASC", self.expr), - Some(false) => write!(f, "{} DESC", self.expr), - None => write!(f, "{}", self.expr), + Some(true) => write!(f, " ASC")?, + Some(false) => write!(f, " DESC")?, + None => (), } + match self.nulls_first { + Some(true) => write!(f, " NULLS FIRST")?, + Some(false) => write!(f, " NULLS LAST")?, + None => (), + } + Ok(()) } } diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index b8f8817f5..a01871c6e 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -220,6 +220,7 @@ define_keywords!( LAG, LANGUAGE, LARGE, + LAST, LAST_VALUE, LATERAL, LEAD, @@ -262,6 +263,7 @@ define_keywords!( NTILE, NULL, NULLIF, + NULLS, NUMERIC, OBJECT, OCTET_LENGTH, diff --git a/src/parser.rs b/src/parser.rs index d235eb169..608ac4736 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2015,7 +2015,20 @@ impl Parser { } else { None }; - Ok(OrderByExpr { expr, asc }) + + let nulls_first = if self.parse_keywords(vec!["NULLS", "FIRST"]) { + Some(true) + } else if self.parse_keywords(vec!["NULLS", "LAST"]) { + Some(false) + } else { + None + }; + + Ok(OrderByExpr { + expr, + asc, + nulls_first, + }) } /// Parse a TOP clause, MSSQL equivalent of LIMIT, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 7272e35ac..c87fcf3a2 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -746,14 +746,17 @@ fn parse_select_order_by() { OrderByExpr { expr: Expr::Identifier(Ident::new("lname")), asc: Some(true), + nulls_first: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("fname")), asc: Some(false), + nulls_first: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("id")), asc: None, + nulls_first: None, }, ], select.order_by @@ -775,10 +778,35 @@ fn parse_select_order_by_limit() { OrderByExpr { expr: Expr::Identifier(Ident::new("lname")), asc: Some(true), + nulls_first: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("fname")), asc: Some(false), + nulls_first: None, + }, + ], + select.order_by + ); + assert_eq!(Some(Expr::Value(number("2"))), select.limit); +} + +#[test] +fn parse_select_order_by_nulls_order() { + let sql = "SELECT id, fname, lname FROM customer WHERE id < 5 \ + ORDER BY lname ASC NULLS FIRST, fname DESC NULLS LAST LIMIT 2"; + let select = verified_query(sql); + assert_eq!( + vec![ + OrderByExpr { + expr: Expr::Identifier(Ident::new("lname")), + asc: Some(true), + nulls_first: Some(true), + }, + OrderByExpr { + expr: Expr::Identifier(Ident::new("fname")), + asc: Some(false), + nulls_first: Some(false), }, ], select.order_by @@ -1251,7 +1279,8 @@ fn parse_window_functions() { partition_by: vec![], order_by: vec![OrderByExpr { expr: Expr::Identifier(Ident::new("dt")), - asc: Some(false) + asc: Some(false), + nulls_first: None, }], window_frame: None, }), From 5f3c1bda0133348a0b60f18c88a0d48b9400dcfe Mon Sep 17 00:00:00 2001 From: Max Countryman Date: Sat, 30 May 2020 08:50:17 -0700 Subject: [PATCH 022/122] Provide LISTAGG implementation (#174) This patch provides an initial implemenation of LISTAGG[1]. Notably this implemenation deviates from ANSI SQL by allowing both WITHIN GROUP and the delimiter to be optional. We do so because Redshift SQL works this way and this approach is ultimately more flexible. Fixes #169. [1] https://modern-sql.com/feature/listagg --- CHANGELOG.md | 1 + src/ast/mod.rs | 74 ++++++++++++++++++++++++++++++++ src/dialect/keywords.rs | 3 ++ src/parser.rs | 88 +++++++++++++++++++++++++++++++++------ tests/sqlparser_common.rs | 58 ++++++++++++++++++++++++-- 5 files changed, 207 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d39c76cdb..60c25da18 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented - Support `ON { UPDATE | DELETE } { RESTRICT | CASCADE | SET NULL | NO ACTION | SET DEFAULT }` in `FOREIGN KEY` constraints (#170) - thanks @c7hm4r! - Support basic forms of `CREATE SCHEMA` and `DROP SCHEMA` (#173) - thanks @alex-dukhno! - Support `NULLS FIRST`/`LAST` in `ORDER BY` expressions (#176) - thanks @houqp! +- Support `LISTAGG()` (#174) - thanks @maxcountryman! ### Fixed - Report an error for unterminated string literals (#165) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index a867abcf9..2dbf42b29 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -224,6 +224,8 @@ pub enum Expr { /// A parenthesized subquery `(SELECT ...)`, used in expression like /// `SELECT (subquery) AS x` or `WHERE (subquery) = x` Subquery(Box), + /// The `LISTAGG` function `SELECT LISTAGG(...) WITHIN GROUP (ORDER BY ...)` + ListAgg(ListAgg), } impl fmt::Display for Expr { @@ -299,6 +301,7 @@ impl fmt::Display for Expr { } Expr::Exists(s) => write!(f, "EXISTS ({})", s), Expr::Subquery(s) => write!(f, "({})", s), + Expr::ListAgg(listagg) => write!(f, "{}", listagg), } } } @@ -850,6 +853,77 @@ impl FromStr for FileFormat { } } +/// A `LISTAGG` invocation `LISTAGG( [ DISTINCT ] [, ] [ON OVERFLOW ] ) ) +/// [ WITHIN GROUP (ORDER BY [, ...] ) ]` +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ListAgg { + pub distinct: bool, + pub expr: Box, + pub separator: Option>, + pub on_overflow: Option, + pub within_group: Vec, +} + +impl fmt::Display for ListAgg { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "LISTAGG({}{}", + if self.distinct { "DISTINCT " } else { "" }, + self.expr + )?; + if let Some(separator) = &self.separator { + write!(f, ", {}", separator)?; + } + if let Some(on_overflow) = &self.on_overflow { + write!(f, "{}", on_overflow)?; + } + write!(f, ")")?; + if !self.within_group.is_empty() { + write!( + f, + " WITHIN GROUP (ORDER BY {})", + display_comma_separated(&self.within_group) + )?; + } + Ok(()) + } +} + +/// The `ON OVERFLOW` clause of a LISTAGG invocation +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum ListAggOnOverflow { + /// `ON OVERFLOW ERROR` + Error, + + /// `ON OVERFLOW TRUNCATE [ ] WITH[OUT] COUNT` + Truncate { + filler: Option>, + with_count: bool, + }, +} + +impl fmt::Display for ListAggOnOverflow { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, " ON OVERFLOW")?; + match self { + ListAggOnOverflow::Error => write!(f, " ERROR"), + ListAggOnOverflow::Truncate { filler, with_count } => { + write!(f, " TRUNCATE")?; + if let Some(filler) = filler { + write!(f, " {}", filler)?; + } + if *with_count { + write!(f, " WITH")?; + } else { + write!(f, " WITHOUT")?; + } + write!(f, " COUNT") + } + } + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum ObjectType { Table, diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index a01871c6e..ee59a1c92 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -161,6 +161,7 @@ define_keywords!( END_FRAME, END_PARTITION, EQUALS, + ERROR, ESCAPE, EVERY, EXCEPT, @@ -230,6 +231,7 @@ define_keywords!( LIKE, LIKE_REGEX, LIMIT, + LISTAGG, LN, LOCAL, LOCALTIME, @@ -279,6 +281,7 @@ define_keywords!( OUT, OUTER, OVER, + OVERFLOW, OVERLAPS, OVERLAY, PARAMETER, diff --git a/src/parser.rs b/src/parser.rs index 608ac4736..c0345736f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -191,6 +191,7 @@ impl Parser { "EXISTS" => self.parse_exists_expr(), "EXTRACT" => self.parse_extract_expr(), "INTERVAL" => self.parse_literal_interval(), + "LISTAGG" => self.parse_listagg_expr(), "NOT" => Ok(Expr::UnaryOp { op: UnaryOperator::Not, expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?), @@ -272,14 +273,7 @@ impl Parser { pub fn parse_function(&mut self, name: ObjectName) -> Result { self.expect_token(&Token::LParen)?; - let all = self.parse_keyword("ALL"); - let distinct = self.parse_keyword("DISTINCT"); - if all && distinct { - return parser_err!(format!( - "Cannot specify both ALL and DISTINCT in function: {}", - name.to_string(), - )); - } + let distinct = self.parse_all_or_distinct()?; let args = self.parse_optional_args()?; let over = if self.parse_keyword("OVER") { // TBD: support window names (`OVER mywin`) in place of inline specification @@ -423,6 +417,66 @@ impl Parser { }) } + /// Parse a SQL LISTAGG expression, e.g. `LISTAGG(...) WITHIN GROUP (ORDER BY ...)`. + pub fn parse_listagg_expr(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let distinct = self.parse_all_or_distinct()?; + let expr = Box::new(self.parse_expr()?); + // While ANSI SQL would would require the separator, Redshift makes this optional. Here we + // choose to make the separator optional as this provides the more general implementation. + let separator = if self.consume_token(&Token::Comma) { + Some(Box::new(self.parse_expr()?)) + } else { + None + }; + let on_overflow = if self.parse_keywords(vec!["ON", "OVERFLOW"]) { + if self.parse_keyword("ERROR") { + Some(ListAggOnOverflow::Error) + } else { + self.expect_keyword("TRUNCATE")?; + let filler = match self.peek_token() { + Some(Token::Word(kw)) if kw.keyword == "WITH" || kw.keyword == "WITHOUT" => { + None + } + Some(Token::SingleQuotedString(_)) + | Some(Token::NationalStringLiteral(_)) + | Some(Token::HexStringLiteral(_)) => Some(Box::new(self.parse_expr()?)), + _ => self.expected( + "either filler, WITH, or WITHOUT in LISTAGG", + self.peek_token(), + )?, + }; + let with_count = self.parse_keyword("WITH"); + if !with_count && !self.parse_keyword("WITHOUT") { + self.expected("either WITH or WITHOUT in LISTAGG", self.peek_token())?; + } + self.expect_keyword("COUNT")?; + Some(ListAggOnOverflow::Truncate { filler, with_count }) + } + } else { + None + }; + self.expect_token(&Token::RParen)?; + // Once again ANSI SQL requires WITHIN GROUP, but Redshift does not. Again we choose the + // more general implementation. + let within_group = if self.parse_keywords(vec!["WITHIN", "GROUP"]) { + self.expect_token(&Token::LParen)?; + self.expect_keywords(&["ORDER", "BY"])?; + let order_by_expr = self.parse_comma_separated(Parser::parse_order_by_expr)?; + self.expect_token(&Token::RParen)?; + order_by_expr + } else { + vec![] + }; + Ok(Expr::ListAgg(ListAgg { + distinct, + expr, + separator, + on_overflow, + within_group, + })) + } + // This function parses date/time fields for both the EXTRACT function-like // operator and interval qualifiers. EXTRACT supports a wider set of // date/time fields than interval qualifiers, so this function may need to @@ -851,6 +905,18 @@ impl Parser { Ok(values) } + /// Parse either `ALL` or `DISTINCT`. Returns `true` if `DISTINCT` is parsed and results in a + /// `ParserError` if both `ALL` and `DISTINCT` are fround. + pub fn parse_all_or_distinct(&mut self) -> Result { + let all = self.parse_keyword("ALL"); + let distinct = self.parse_keyword("DISTINCT"); + if all && distinct { + return parser_err!("Cannot specify both ALL and DISTINCT".to_string()); + } else { + Ok(distinct) + } + } + /// Parse a SQL CREATE statement pub fn parse_create(&mut self) -> Result { if self.parse_keyword("TABLE") { @@ -1635,11 +1701,7 @@ impl Parser { /// Parse a restricted `SELECT` statement (no CTEs / `UNION` / `ORDER BY`), /// assuming the initial `SELECT` was already consumed pub fn parse_select(&mut self) -> Result { - let all = self.parse_keyword("ALL"); - let distinct = self.parse_keyword("DISTINCT"); - if all && distinct { - return parser_err!("Cannot specify both ALL and DISTINCT in SELECT"); - } + let distinct = self.parse_all_or_distinct()?; let top = if self.parse_keyword("TOP") { Some(self.parse_top()?) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index c87fcf3a2..257b48230 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -244,7 +244,7 @@ fn parse_select_all() { fn parse_select_all_distinct() { let result = parse_sql_statements("SELECT ALL DISTINCT name FROM customer"); assert_eq!( - ParserError::ParserError("Cannot specify both ALL and DISTINCT in SELECT".to_string()), + ParserError::ParserError("Cannot specify both ALL and DISTINCT".to_string()), result.unwrap_err(), ); } @@ -357,9 +357,7 @@ fn parse_select_count_distinct() { let sql = "SELECT COUNT(ALL DISTINCT + x) FROM customer"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError( - "Cannot specify both ALL and DISTINCT in function: COUNT".to_string() - ), + ParserError::ParserError("Cannot specify both ALL and DISTINCT".to_string()), res.unwrap_err() ); } @@ -914,6 +912,58 @@ fn parse_extract() { ); } +#[test] +fn parse_listagg() { + let sql = "SELECT LISTAGG(DISTINCT dateid, ', ' ON OVERFLOW TRUNCATE '%' WITHOUT COUNT) \ + WITHIN GROUP (ORDER BY id, username)"; + let select = verified_only_select(sql); + + verified_stmt("SELECT LISTAGG(sellerid) WITHIN GROUP (ORDER BY dateid)"); + verified_stmt("SELECT LISTAGG(dateid)"); + verified_stmt("SELECT LISTAGG(DISTINCT dateid)"); + verified_stmt("SELECT LISTAGG(dateid ON OVERFLOW ERROR)"); + verified_stmt("SELECT LISTAGG(dateid ON OVERFLOW TRUNCATE N'...' WITH COUNT)"); + verified_stmt("SELECT LISTAGG(dateid ON OVERFLOW TRUNCATE X'deadbeef' WITH COUNT)"); + + let expr = Box::new(Expr::Identifier(Ident::new("dateid"))); + let on_overflow = Some(ListAggOnOverflow::Truncate { + filler: Some(Box::new(Expr::Value(Value::SingleQuotedString( + "%".to_string(), + )))), + with_count: false, + }); + let within_group = vec![ + OrderByExpr { + expr: Expr::Identifier(Ident { + value: "id".to_string(), + quote_style: None, + }), + asc: None, + nulls_first: None, + }, + OrderByExpr { + expr: Expr::Identifier(Ident { + value: "username".to_string(), + quote_style: None, + }), + asc: None, + nulls_first: None, + }, + ]; + assert_eq!( + &Expr::ListAgg(ListAgg { + distinct: true, + expr, + separator: Some(Box::new(Expr::Value(Value::SingleQuotedString( + ", ".to_string() + )))), + on_overflow, + within_group + }), + expr_from_projection(only(&select.projection)) + ); +} + #[test] fn parse_create_table() { let sql = "CREATE TABLE uk_cities (\ From 00dc490f72aed72a4c92d367280197a1f0a98a9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Tue, 2 Jun 2020 20:24:30 +0200 Subject: [PATCH 023/122] Support the string concat operator (#178) The selected precedence is based on BigQuery documentation, where it is equal to `*` and `/`: https://cloud.google.com/bigquery/docs/reference/standard-sql/operators --- src/ast/operator.rs | 2 ++ src/parser.rs | 3 ++- src/tokenizer.rs | 33 +++++++++++++++++++++++++++++++++ tests/sqlparser_common.rs | 15 +++++++++++++++ 4 files changed, 52 insertions(+), 1 deletion(-) diff --git a/src/ast/operator.rs b/src/ast/operator.rs index f2970482c..2d75c46fa 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -38,6 +38,7 @@ pub enum BinaryOperator { Multiply, Divide, Modulus, + StringConcat, Gt, Lt, GtEq, @@ -58,6 +59,7 @@ impl fmt::Display for BinaryOperator { BinaryOperator::Multiply => "*", BinaryOperator::Divide => "/", BinaryOperator::Modulus => "%", + BinaryOperator::StringConcat => "||", BinaryOperator::Gt => ">", BinaryOperator::Lt => "<", BinaryOperator::GtEq => ">=", diff --git a/src/parser.rs b/src/parser.rs index c0345736f..00dd24948 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -577,6 +577,7 @@ impl Parser { Token::Minus => Some(BinaryOperator::Minus), Token::Mult => Some(BinaryOperator::Multiply), Token::Mod => Some(BinaryOperator::Modulus), + Token::StringConcat => Some(BinaryOperator::StringConcat), Token::Div => Some(BinaryOperator::Divide), Token::Word(ref k) => match k.keyword.as_ref() { "AND" => Some(BinaryOperator::And), @@ -708,7 +709,7 @@ impl Parser { Ok(20) } Token::Plus | Token::Minus => Ok(Self::PLUS_MINUS_PREC), - Token::Mult | Token::Div | Token::Mod => Ok(40), + Token::Mult | Token::Div | Token::Mod | Token::StringConcat => Ok(40), Token::DoubleColon => Ok(50), _ => Ok(0), } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 86452a445..f3504ffb6 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -64,6 +64,8 @@ pub enum Token { Div, /// Modulo Operator `%` Mod, + /// String concatenation `||` + StringConcat, /// Left parenthesis `(` LParen, /// Right parenthesis `)` @@ -111,6 +113,7 @@ impl fmt::Display for Token { Token::Minus => f.write_str("-"), Token::Mult => f.write_str("*"), Token::Div => f.write_str("/"), + Token::StringConcat => f.write_str("||"), Token::Mod => f.write_str("%"), Token::LParen => f.write_str("("), Token::RParen => f.write_str(")"), @@ -374,6 +377,16 @@ impl<'a> Tokenizer<'a> { '+' => self.consume_and_return(chars, Token::Plus), '*' => self.consume_and_return(chars, Token::Mult), '%' => self.consume_and_return(chars, Token::Mod), + '|' => { + chars.next(); // consume the '|' + match chars.peek() { + Some('|') => self.consume_and_return(chars, Token::StringConcat), + _ => Err(TokenizerError(format!( + "Expecting to see `||`. Bitwise or operator `|` is not supported. \nError at Line: {}, Col: {}", + self.line, self.col + ))), + } + } '=' => self.consume_and_return(chars, Token::Eq), '.' => self.consume_and_return(chars, Token::Period), '!' => { @@ -562,6 +575,26 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_string_string_concat() { + let sql = String::from("SELECT 'a' || 'b'"); + let dialect = GenericDialect {}; + let mut tokenizer = Tokenizer::new(&dialect, &sql); + let tokens = tokenizer.tokenize().unwrap(); + + let expected = vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::SingleQuotedString(String::from("a")), + Token::Whitespace(Whitespace::Space), + Token::StringConcat, + Token::Whitespace(Whitespace::Space), + Token::SingleQuotedString(String::from("b")), + ]; + + compare(expected, tokens); + } + #[test] fn tokenize_simple_select() { let sql = String::from("SELECT * FROM customer WHERE id = 1 LIMIT 5"); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 257b48230..34f8c589a 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -665,6 +665,21 @@ fn parse_in_subquery() { ); } +#[test] +fn parse_string_agg() { + let sql = "SELECT a || b"; + + let select = verified_only_select(sql); + assert_eq!( + SelectItem::UnnamedExpr(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("a"))), + op: BinaryOperator::StringConcat, + right: Box::new(Expr::Identifier(Ident::new("b"))), + }), + select.projection[0] + ); +} + #[test] fn parse_between() { fn chk(negated: bool) { From b4699bd4a737a0b5daac3b8fc08169d84ea68d99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Wed, 3 Jun 2020 18:02:05 +0200 Subject: [PATCH 024/122] Support bitwise and, or, xor (#181) Operator precedence is coming from: https://cloud.google.com/bigquery/docs/reference/standard-sql/operators --- src/ast/operator.rs | 6 ++++++ src/parser.rs | 6 ++++++ src/tokenizer.rs | 38 +++++++++++++++++++++++++++++++++----- tests/sqlparser_common.rs | 21 +++++++++++++++++++++ 4 files changed, 66 insertions(+), 5 deletions(-) diff --git a/src/ast/operator.rs b/src/ast/operator.rs index 2d75c46fa..c9f5eb2e9 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -49,6 +49,9 @@ pub enum BinaryOperator { Or, Like, NotLike, + BitwiseOr, + BitwiseAnd, + BitwiseXor, } impl fmt::Display for BinaryOperator { @@ -70,6 +73,9 @@ impl fmt::Display for BinaryOperator { BinaryOperator::Or => "OR", BinaryOperator::Like => "LIKE", BinaryOperator::NotLike => "NOT LIKE", + BinaryOperator::BitwiseOr => "|", + BinaryOperator::BitwiseAnd => "&", + BinaryOperator::BitwiseXor => "^", }) } } diff --git a/src/parser.rs b/src/parser.rs index 00dd24948..0a50d2d75 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -578,6 +578,9 @@ impl Parser { Token::Mult => Some(BinaryOperator::Multiply), Token::Mod => Some(BinaryOperator::Modulus), Token::StringConcat => Some(BinaryOperator::StringConcat), + Token::Pipe => Some(BinaryOperator::BitwiseOr), + Token::Caret => Some(BinaryOperator::BitwiseXor), + Token::Ampersand => Some(BinaryOperator::BitwiseAnd), Token::Div => Some(BinaryOperator::Divide), Token::Word(ref k) => match k.keyword.as_ref() { "AND" => Some(BinaryOperator::And), @@ -708,6 +711,9 @@ impl Parser { Token::Eq | Token::Lt | Token::LtEq | Token::Neq | Token::Gt | Token::GtEq => { Ok(20) } + Token::Pipe => Ok(21), + Token::Caret => Ok(22), + Token::Ampersand => Ok(23), Token::Plus | Token::Minus => Ok(Self::PLUS_MINUS_PREC), Token::Mult | Token::Div | Token::Mod | Token::StringConcat => Ok(40), Token::DoubleColon => Ok(50), diff --git a/src/tokenizer.rs b/src/tokenizer.rs index f3504ffb6..06c52c2c5 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -84,8 +84,12 @@ pub enum Token { LBracket, /// Right bracket `]` RBracket, - /// Ampersand & + /// Ampersand `&` Ampersand, + /// Pipe `|` + Pipe, + /// Caret `^` + Caret, /// Left brace `{` LBrace, /// Right brace `}` @@ -125,6 +129,8 @@ impl fmt::Display for Token { Token::LBracket => f.write_str("["), Token::RBracket => f.write_str("]"), Token::Ampersand => f.write_str("&"), + Token::Caret => f.write_str("^"), + Token::Pipe => f.write_str("|"), Token::LBrace => f.write_str("{"), Token::RBrace => f.write_str("}"), } @@ -381,10 +387,8 @@ impl<'a> Tokenizer<'a> { chars.next(); // consume the '|' match chars.peek() { Some('|') => self.consume_and_return(chars, Token::StringConcat), - _ => Err(TokenizerError(format!( - "Expecting to see `||`. Bitwise or operator `|` is not supported. \nError at Line: {}, Col: {}", - self.line, self.col - ))), + // Bitshift '|' operator + _ => Ok(Some(Token::Pipe)), } } '=' => self.consume_and_return(chars, Token::Eq), @@ -426,6 +430,7 @@ impl<'a> Tokenizer<'a> { '[' => self.consume_and_return(chars, Token::LBracket), ']' => self.consume_and_return(chars, Token::RBracket), '&' => self.consume_and_return(chars, Token::Ampersand), + '^' => self.consume_and_return(chars, Token::Caret), '{' => self.consume_and_return(chars, Token::LBrace), '}' => self.consume_and_return(chars, Token::RBrace), other => self.consume_and_return(chars, Token::Char(other)), @@ -594,6 +599,29 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_bitwise_op() { + let sql = String::from("SELECT one | two ^ three"); + let dialect = GenericDialect {}; + let mut tokenizer = Tokenizer::new(&dialect, &sql); + let tokens = tokenizer.tokenize().unwrap(); + + let expected = vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::make_word("one", None), + Token::Whitespace(Whitespace::Space), + Token::Pipe, + Token::Whitespace(Whitespace::Space), + Token::make_word("two", None), + Token::Whitespace(Whitespace::Space), + Token::Caret, + Token::Whitespace(Whitespace::Space), + Token::make_word("three", None), + ]; + + compare(expected, tokens); + } #[test] fn tokenize_simple_select() { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 34f8c589a..1e1c54e10 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -680,6 +680,27 @@ fn parse_string_agg() { ); } +#[test] +fn parse_bitwise_ops() { + let bitwise_ops = &[ + ("^", BinaryOperator::BitwiseXor), + ("|", BinaryOperator::BitwiseOr), + ("&", BinaryOperator::BitwiseAnd), + ]; + + for (str_op, op) in bitwise_ops { + let select = verified_only_select(&format!("SELECT a {} b", &str_op)); + assert_eq!( + SelectItem::UnnamedExpr(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("a"))), + op: op.clone(), + right: Box::new(Expr::Identifier(Ident::new("b"))), + }), + select.projection[0] + ); + } +} + #[test] fn parse_between() { fn chk(negated: bool) { From d32df527e68dd76d857f47ea051a3ec22138469b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Wed, 3 Jun 2020 22:31:41 +0200 Subject: [PATCH 025/122] Accept &str in `Parse::parse_sql` (#182) It is more generic to accept a `&str` than a `String` in an API, and avoids having to convert a string to a `String` when not needed, avoiding a copy. --- README.md | 4 ++-- examples/cli.rs | 2 +- examples/parse_select.rs | 2 +- src/lib.rs | 2 +- src/parser.rs | 2 +- src/test_utils.rs | 4 ++-- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 3b277a96b..24674d911 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ let sql = "SELECT a, b, 123, myfunc(b) \ let dialect = GenericDialect {}; // or AnsiDialect, or your own dialect ... -let ast = Parser::parse_sql(&dialect, sql.to_string()).unwrap(); +let ast = Parser::parse_sql(&dialect, sql).unwrap(); println!("AST: {:?}", ast); ``` @@ -122,4 +122,4 @@ resources. [LocustDB]: https://github.com/cswinter/LocustDB [Pratt Parser]: https://tdop.github.io/ [sql-2016-grammar]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html -[sql-standard]: https://en.wikipedia.org/wiki/ISO/IEC_9075 \ No newline at end of file +[sql-standard]: https://en.wikipedia.org/wiki/ISO/IEC_9075 diff --git a/examples/cli.rs b/examples/cli.rs index 917629e43..2b0822584 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -45,7 +45,7 @@ fn main() { chars.next(); chars.as_str() }; - let parse_result = Parser::parse_sql(&*dialect, without_bom.to_owned()); + let parse_result = Parser::parse_sql(&*dialect, without_bom); match parse_result { Ok(statements) => { println!( diff --git a/examples/parse_select.rs b/examples/parse_select.rs index 539d91652..e7aa16307 100644 --- a/examples/parse_select.rs +++ b/examples/parse_select.rs @@ -23,7 +23,7 @@ fn main() { let dialect = GenericDialect {}; - let ast = Parser::parse_sql(&dialect, sql.to_string()).unwrap(); + let ast = Parser::parse_sql(&dialect, sql).unwrap(); println!("AST: {:?}", ast); } diff --git a/src/lib.rs b/src/lib.rs index 156e20eb8..d25b24997 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,7 +28,7 @@ //! WHERE a > b AND b < 100 \ //! ORDER BY a DESC, b"; //! -//! let ast = Parser::parse_sql(&dialect, sql.to_string()).unwrap(); +//! let ast = Parser::parse_sql(&dialect, sql).unwrap(); //! //! println!("AST: {:?}", ast); //! ``` diff --git a/src/parser.rs b/src/parser.rs index 0a50d2d75..9a22f4d19 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -82,7 +82,7 @@ impl Parser { } /// Parse a SQL statement and produce an Abstract Syntax Tree (AST) - pub fn parse_sql(dialect: &dyn Dialect, sql: String) -> Result, ParserError> { + pub fn parse_sql(dialect: &dyn Dialect, sql: &str) -> Result, ParserError> { let mut tokenizer = Tokenizer::new(dialect, &sql); let tokens = tokenizer.tokenize()?; let mut parser = Parser::new(tokens); diff --git a/src/test_utils.rs b/src/test_utils.rs index d36eeb044..4d4d35616 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -58,9 +58,9 @@ impl TestedDialects { } pub fn parse_sql_statements(&self, sql: &str) -> Result, ParserError> { - self.one_of_identical_results(|dialect| Parser::parse_sql(dialect, sql.to_string())) + self.one_of_identical_results(|dialect| Parser::parse_sql(dialect, &sql)) // To fail the `ensure_multiple_dialects_are_tested` test: - // Parser::parse_sql(&**self.dialects.first().unwrap(), sql.to_string()) + // Parser::parse_sql(&**self.dialects.first().unwrap(), sql) } /// Ensures that `sql` parses as a single statement, optionally checking From 6e6fae73a0aa368ae7a452855ae5fcca2b5cbc3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Sun, 7 Jun 2020 15:46:55 +0200 Subject: [PATCH 026/122] Add benchmarks using cargo bench / criterion (#190) --- docs/benchmarking.md | 6 +++ sqlparser_bench/Cargo.toml | 17 +++++++++ sqlparser_bench/benches/sqlparser_bench.rs | 43 ++++++++++++++++++++++ sqlparser_bench/src/lib.rs | 7 ++++ 4 files changed, 73 insertions(+) create mode 100644 docs/benchmarking.md create mode 100644 sqlparser_bench/Cargo.toml create mode 100644 sqlparser_bench/benches/sqlparser_bench.rs create mode 100644 sqlparser_bench/src/lib.rs diff --git a/docs/benchmarking.md b/docs/benchmarking.md new file mode 100644 index 000000000..feae53c84 --- /dev/null +++ b/docs/benchmarking.md @@ -0,0 +1,6 @@ +# Benchmarking + +Run `cargo bench` in the project `sqlparser_bench` execute the queries. +It will report results using the `criterion` library to perform the benchmarking. + +The bench project lives in another crate, to avoid the negative impact on building the `sqlparser` crate. diff --git a/sqlparser_bench/Cargo.toml b/sqlparser_bench/Cargo.toml new file mode 100644 index 000000000..43ea11913 --- /dev/null +++ b/sqlparser_bench/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "sqlparser_bench" +version = "0.1.0" +authors = ["Dandandan "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +sqlparser = { path = "../", version = "0.5.1-alpha-0" } + +[dev-dependencies] +criterion = {version = "0.3"} + +[[bench]] +name = "sqlparser_bench" +harness = false diff --git a/sqlparser_bench/benches/sqlparser_bench.rs b/sqlparser_bench/benches/sqlparser_bench.rs new file mode 100644 index 000000000..5293c0f50 --- /dev/null +++ b/sqlparser_bench/benches/sqlparser_bench.rs @@ -0,0 +1,43 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use criterion::{criterion_group, criterion_main, Criterion}; +use sqlparser::dialect::GenericDialect; +use sqlparser::parser::Parser; + +fn basic_queries(c: &mut Criterion) { + let mut group = c.benchmark_group("sqlparser-rs parsing benchmark"); + let dialect = GenericDialect {}; + + let string = "SELECT * FROM table WHERE 1 = 1"; + group.bench_function("sqlparser::select", |b| { + b.iter(|| Parser::parse_sql(&dialect, string)); + }); + + let with_query = " + WITH derived AS ( + SELECT MAX(a) AS max_a, + COUNT(b) AS b_num, + user_id + FROM TABLE + GROUP BY user_id + ) + SELECT * FROM table + LEFT JOIN derived USING (user_id) + "; + group.bench_function("sqlparser::with_select", |b| { + b.iter(|| Parser::parse_sql(&dialect, with_query)); + }); +} + +criterion_group!(benches, basic_queries); +criterion_main!(benches); diff --git a/sqlparser_bench/src/lib.rs b/sqlparser_bench/src/lib.rs new file mode 100644 index 000000000..31e1bb209 --- /dev/null +++ b/sqlparser_bench/src/lib.rs @@ -0,0 +1,7 @@ +#[cfg(test)] +mod tests { + #[test] + fn it_works() { + assert_eq!(2 + 2, 4); + } +} From af54eb02b25c418504264d0bd23e66d19712ff4b Mon Sep 17 00:00:00 2001 From: Max Countryman Date: Sun, 7 Jun 2020 10:15:31 -0700 Subject: [PATCH 027/122] Rework github actions, add code coverage (#186) This reworks our GitHub Actions workflow to include code coverage via tarpaulin. Fixes #164. --- .github/workflows/rust.yml | 79 +++++++++++++++++++++++++++----------- 1 file changed, 57 insertions(+), 22 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 06db11ebf..a53e0babc 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -1,30 +1,65 @@ name: Rust -on: [push] +on: [push, pull_request] jobs: - build: + + codestyle: runs-on: ubuntu-latest + steps: + - name: Set up Rust + uses: hecrj/setup-rust-action@v1 + with: + components: rustfmt + # Note that `nightly` is required for `license_template_path`, as + # it's an unstable feature. + rust-version: nightly + - uses: actions/checkout@v2 + - run: cargo fmt -- --check --config-path <(echo 'license_template_path = "HEADER"') + + lint: + runs-on: ubuntu-latest + steps: + - name: Set up Rust + uses: hecrj/setup-rust-action@v1 + with: + components: clippy + - uses: actions/checkout@v2 + - run: cargo clippy --all-targets --all-features -- -D warnings + compile: + runs-on: ubuntu-latest + steps: + - name: Set up Rust + uses: hecrj/setup-rust-action@v1 + - uses: actions/checkout@master + - run: cargo check --all-targets --all-features + + test: + strategy: + matrix: + rust: [stable, beta, nightly] + runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 - name: Setup Rust - run: | - rustup toolchain install nightly --profile default - rustup toolchain install stable - rustup override set stable - # Clippy must be run first, as its lints are only triggered during - # compilation. Put another way: after a successful `cargo build`, `cargo - # clippy` is guaranteed to produce no results. This bug is known upstream: - # https://github.com/rust-lang/rust-clippy/issues/2604. -# - name: Clippy -# run: cargo clippy -- --all-targets --all-features -- -D warnings - - name: Check formatting - run: | - cargo +nightly fmt -- --check --config-path <(echo 'license_template_path = "HEADER"') - - name: Build - run: cargo build --verbose - - name: Run tests - run: cargo test --verbose - - name: Run tests for all features - run: cargo test --verbose -- all-features + uses: hecrj/setup-rust-action@v1 + with: + rust-version: ${{ matrix.rust }} + - name: Install Tarpaulin + uses: actions-rs/install@v0.1 + with: + crate: cargo-tarpaulin + version: 0.13.3 + use-tool-cache: true + - name: Checkout + uses: actions/checkout@v2 + - name: Test + run: cargo test --all-features + - name: Coverage + if: matrix.rust == 'stable' + run: cargo tarpaulin -o Lcov --output-dir ./coverage + - name: Coveralls + if: matrix.rust == 'stable' + uses: coverallsapp/github-action@master + with: + github-token: ${{ secrets.GITHUB_TOKEN }} From a42121de522941a82a2b89eb5ac662822964f151 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Sun, 7 Jun 2020 19:25:10 +0200 Subject: [PATCH 028/122] Use binary search to speed up matching keywords (#191) --- src/dialect/keywords.rs | 29 +++++++++++++++-------------- src/tokenizer.rs | 7 +++---- tests/sqlparser_common.rs | 9 +++++++++ 3 files changed, 27 insertions(+), 18 deletions(-) diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index ee59a1c92..bb4f678e0 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -49,11 +49,11 @@ macro_rules! define_keywords { } } +// The following keywords should be sorted to be able to match using binary search define_keywords!( ABS, ACTION, ADD, - ASC, ALL, ALLOCATE, ALTER, @@ -65,6 +65,7 @@ define_keywords!( ARRAY_AGG, ARRAY_MAX_CARDINALITY, AS, + ASC, ASENSITIVE, ASYMMETRIC, AT, @@ -93,9 +94,9 @@ define_keywords!( CEILING, CHAIN, CHAR, - CHAR_LENGTH, CHARACTER, CHARACTER_LENGTH, + CHAR_LENGTH, CHECK, CLOB, CLOSE, @@ -158,6 +159,7 @@ define_keywords!( ELEMENT, ELSE, END, + END_EXEC = "END-EXEC", END_FRAME, END_PARTITION, EQUALS, @@ -175,8 +177,8 @@ define_keywords!( FALSE, FETCH, FIELDS, - FIRST, FILTER, + FIRST, FIRST_VALUE, FLOAT, FLOOR, @@ -255,8 +257,8 @@ define_keywords!( NATURAL, NCHAR, NCLOB, - NEXT, NEW, + NEXT, NO, NONE, NORMALIZE, @@ -268,8 +270,8 @@ define_keywords!( NULLS, NUMERIC, OBJECT, - OCTET_LENGTH, OCCURRENCES_REGEX, + OCTET_LENGTH, OF, OFFSET, OLD, @@ -285,12 +287,12 @@ define_keywords!( OVERLAPS, OVERLAY, PARAMETER, - PARTITION, PARQUET, + PARTITION, PERCENT, - PERCENT_RANK, PERCENTILE_CONT, PERCENTILE_DISC, + PERCENT_RANK, PERIOD, PORTION, POSITION, @@ -332,8 +334,8 @@ define_keywords!( ROLLBACK, ROLLUP, ROW, - ROW_NUMBER, ROWS, + ROW_NUMBER, SAVEPOINT, SCHEMA, SCOPE, @@ -390,10 +392,10 @@ define_keywords!( TRANSLATION, TREAT, TRIGGER, - TRUNCATE, TRIM, TRIM_ARRAY, TRUE, + TRUNCATE, UESCAPE, UNBOUNDED, UNCOMMITTED, @@ -409,11 +411,11 @@ define_keywords!( VALUE, VALUES, VALUE_OF, - VAR_POP, - VAR_SAMP, VARBINARY, VARCHAR, VARYING, + VAR_POP, + VAR_SAMP, VERSIONING, VIEW, WHEN, @@ -424,11 +426,10 @@ define_keywords!( WITH, WITHIN, WITHOUT, - WRITE, WORK, + WRITE, YEAR, - ZONE, - END_EXEC = "END-EXEC" + ZONE ); /// These keywords can't be used as a table alias, so that `FROM table_name alias` diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 06c52c2c5..910f73911 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -143,10 +143,9 @@ impl Token { } pub fn make_word(word: &str, quote_style: Option) -> Self { let word_uppercase = word.to_uppercase(); - //TODO: need to reintroduce FnvHashSet at some point .. iterating over keywords is - // not fast but I want the simplicity for now while I experiment with pluggable - // dialects - let is_keyword = quote_style == None && ALL_KEYWORDS.contains(&word_uppercase.as_str()); + //TODO: validate use of a hashset (e.g. FnvHashSet) compared to using binary search + let is_keyword = + quote_style == None && ALL_KEYWORDS.binary_search(&word_uppercase.as_str()).is_ok(); Token::Word(Word { value: word.to_string(), quote_style, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 1e1c54e10..a63d3d49f 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -21,6 +21,7 @@ use matches::assert_matches; use sqlparser::ast::*; +use sqlparser::dialect::keywords::ALL_KEYWORDS; use sqlparser::parser::*; use sqlparser::test_utils::{all_dialects, expr_from_projection, number, only}; @@ -2851,6 +2852,14 @@ fn parse_drop_index() { } } +#[test] +fn keywords_sorted() { + // assert!(ALL_KEYWORDS.is_sorted()) + let mut copy = Vec::from(ALL_KEYWORDS); + copy.sort(); + assert!(copy == ALL_KEYWORDS) +} + fn parse_sql_statements(sql: &str) -> Result, ParserError> { all_dialects().parse_sql_statements(sql) } From 10b0b7f884dfe027514d135b2ff547cb82fa5ebe Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 7 Jun 2020 20:43:44 +0300 Subject: [PATCH 029/122] Update CHANGELOG (#192) Also remove a comment with a trailing space, which rustfmt doesn't like --- CHANGELOG.md | 3 +++ src/tokenizer.rs | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 60c25da18..ab27d94e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ Given that the parser produces a typed AST, any changes to the AST will technica Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented changes. ### Changed +- **`Parser::parse_sql` now accepts a `&str` instead of `String` (#182)** - thanks @Dandandan! - Change `Ident` (previously a simple `String`) to store the parsed (unquoted) `value` of the identifier and the `quote_style` separately (#143) - thanks @apparebit! - Support Snowflake's `FROM (table_name)` (#155) - thanks @eyalleshem! @@ -21,6 +22,8 @@ Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented - Support basic forms of `CREATE SCHEMA` and `DROP SCHEMA` (#173) - thanks @alex-dukhno! - Support `NULLS FIRST`/`LAST` in `ORDER BY` expressions (#176) - thanks @houqp! - Support `LISTAGG()` (#174) - thanks @maxcountryman! +- Support the string concatentation operator `||` (#178) - thanks @Dandandan! +- Support bitwise AND (`&`), OR (`|`), XOR (`^`) (#181) - thanks @Dandandan! ### Fixed - Report an error for unterminated string literals (#165) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 910f73911..1f33dd5dc 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -143,7 +143,6 @@ impl Token { } pub fn make_word(word: &str, quote_style: Option) -> Self { let word_uppercase = word.to_uppercase(); - //TODO: validate use of a hashset (e.g. FnvHashSet) compared to using binary search let is_keyword = quote_style == None && ALL_KEYWORDS.binary_search(&word_uppercase.as_str()).is_ok(); Token::Word(Word { From d842f495db6936f64b2738178a431f0ebd6c4e07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Wed, 10 Jun 2020 08:15:44 +0200 Subject: [PATCH 030/122] Add line and column number to TokenizerError (#194) Addresses https://github.com/andygrove/sqlparser-rs/issues/179 for tokenize errors --- src/parser.rs | 5 ++++- src/tokenizer.rs | 54 +++++++++++++++++++++++++++--------------------- 2 files changed, 34 insertions(+), 25 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 9a22f4d19..780334a9b 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -49,7 +49,10 @@ use IsLateral::*; impl From for ParserError { fn from(e: TokenizerError) -> Self { - ParserError::TokenizerError(format!("{:?}", e)) + ParserError::TokenizerError(format!( + "{} at Line: {}, Column {}", + e.message, e.line, e.col + )) } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 1f33dd5dc..68ebfaf62 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -217,7 +217,11 @@ impl fmt::Display for Whitespace { /// Tokenizer error #[derive(Debug, PartialEq)] -pub struct TokenizerError(String); +pub struct TokenizerError { + pub message: String, + pub line: u64, + pub col: u64, +} /// SQL Tokenizer pub struct Tokenizer<'a> { @@ -331,10 +335,10 @@ impl<'a> Tokenizer<'a> { if chars.next() == Some(quote_end) { Ok(Some(Token::make_word(&s, Some(quote_start)))) } else { - Err(TokenizerError(format!( - "Expected close delimiter '{}' before EOF.", - quote_end - ))) + self.tokenizer_error( + format!("Expected close delimiter '{}' before EOF.", quote_end) + .as_str(), + ) } } // numbers @@ -395,10 +399,7 @@ impl<'a> Tokenizer<'a> { chars.next(); // consume match chars.peek() { Some('=') => self.consume_and_return(chars, Token::Neq), - _ => Err(TokenizerError(format!( - "Tokenizer Error at Line: {}, Col: {}", - self.line, self.col - ))), + _ => self.tokenizer_error("Expected to see '=' after '!' character"), } } '<' => { @@ -437,6 +438,14 @@ impl<'a> Tokenizer<'a> { } } + fn tokenizer_error(&self, message: &str) -> Result { + Err(TokenizerError { + message: message.to_string(), + col: self.col, + line: self.line, + }) + } + /// Tokenize an identifier or keyword, after the first char is already consumed. fn tokenize_word(&self, first_char: char, chars: &mut Peekable>) -> String { let mut s = first_char.to_string(); @@ -471,10 +480,7 @@ impl<'a> Tokenizer<'a> { } } } - Err(TokenizerError(format!( - "Unterminated string literal at Line: {}, Col: {}", - self.line, self.col - ))) + self.tokenizer_error("Unterminated string literal") } fn tokenize_multiline_comment( @@ -499,11 +505,7 @@ impl<'a> Tokenizer<'a> { s.push(ch); } } - None => { - break Err(TokenizerError( - "Unexpected EOF while in a multi-line comment".to_string(), - )); - } + None => break self.tokenizer_error("Unexpected EOF while in a multi-line comment"), } } } @@ -720,9 +722,11 @@ mod tests { let mut tokenizer = Tokenizer::new(&dialect, &sql); assert_eq!( tokenizer.tokenize(), - Err(TokenizerError( - "Unterminated string literal at Line: 1, Col: 8".to_string() - )) + Err(TokenizerError { + message: "Unterminated string literal".to_string(), + line: 1, + col: 8 + }) ); } @@ -843,9 +847,11 @@ mod tests { let mut tokenizer = Tokenizer::new(&dialect, &sql); assert_eq!( tokenizer.tokenize(), - Err(TokenizerError( - "Expected close delimiter '\"' before EOF.".to_string(), - )) + Err(TokenizerError { + message: "Expected close delimiter '\"' before EOF.".to_string(), + line: 1, + col: 1 + }) ); } From 846c52f4500a5b79d351f8f9e3166e2806508a55 Mon Sep 17 00:00:00 2001 From: Max Countryman Date: Tue, 9 Jun 2020 23:32:13 -0700 Subject: [PATCH 031/122] Allow omitting units after INTERVAL (#184) Alter INTERVAL to support postgres syntax This patch updates our INTERVAL implementation such that the Postgres and Redshfit variation of the syntax is supported: namely that 'leading field' is optional. Fixes #177. --- src/ast/value.rs | 16 +++++++--------- src/parser.rs | 17 +++++++++++++---- tests/sqlparser_common.rs | 29 ++++++++++++++++++++++++----- 3 files changed, 44 insertions(+), 18 deletions(-) diff --git a/src/ast/value.rs b/src/ast/value.rs index fe2870f95..fdcd238cd 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -37,7 +37,7 @@ pub enum Value { /// `TIMESTAMP '...'` literals Timestamp(String), /// INTERVAL literals, roughly in the following format: - /// `INTERVAL '' [ () ] + /// `INTERVAL '' [ [ () ] ] /// [ TO [ () ] ]`, /// e.g. `INTERVAL '123:45.67' MINUTE(3) TO SECOND(2)`. /// @@ -46,7 +46,7 @@ pub enum Value { /// so the user will have to reject intervals like `HOUR TO YEAR`. Interval { value: String, - leading_field: DateTimeField, + leading_field: Option, leading_precision: Option, last_field: Option, /// The seconds precision can be specified in SQL source as @@ -72,7 +72,7 @@ impl fmt::Display for Value { Value::Timestamp(v) => write!(f, "TIMESTAMP '{}'", escape_single_quote_string(v)), Value::Interval { value, - leading_field: DateTimeField::Second, + leading_field: Some(DateTimeField::Second), leading_precision: Some(leading_precision), last_field, fractional_seconds_precision: Some(fractional_seconds_precision), @@ -95,12 +95,10 @@ impl fmt::Display for Value { last_field, fractional_seconds_precision, } => { - write!( - f, - "INTERVAL '{}' {}", - escape_single_quote_string(value), - leading_field - )?; + write!(f, "INTERVAL '{}'", escape_single_quote_string(value))?; + if let Some(leading_field) = leading_field { + write!(f, " {}", leading_field)?; + } if let Some(leading_precision) = leading_precision { write!(f, " ({})", leading_precision)?; } diff --git a/src/parser.rs b/src/parser.rs index 780334a9b..7c136cc6e 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -526,12 +526,21 @@ impl Parser { // Following the string literal is a qualifier which indicates the units // of the duration specified in the string literal. // - // Note that PostgreSQL allows omitting the qualifier, but we currently - // require at least the leading field, in accordance with the ANSI spec. - let leading_field = self.parse_date_time_field()?; + // Note that PostgreSQL allows omitting the qualifier, so we provide + // this more general implemenation. + let leading_field = match self.peek_token() { + Some(Token::Word(kw)) + if ["YEAR", "MONTH", "DAY", "HOUR", "MINUTE", "SECOND"] + .iter() + .any(|d| kw.keyword == *d) => + { + Some(self.parse_date_time_field()?) + } + _ => None, + }; let (leading_precision, last_field, fsec_precision) = - if leading_field == DateTimeField::Second { + if leading_field == Some(DateTimeField::Second) { // SQL mandates special syntax for `SECOND TO SECOND` literals. // Instead of // `SECOND [()] TO SECOND[()]` diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index a63d3d49f..8ff6e7e9b 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1459,7 +1459,7 @@ fn parse_literal_interval() { assert_eq!( &Expr::Value(Value::Interval { value: "1-1".into(), - leading_field: DateTimeField::Year, + leading_field: Some(DateTimeField::Year), leading_precision: None, last_field: Some(DateTimeField::Month), fractional_seconds_precision: None, @@ -1472,7 +1472,7 @@ fn parse_literal_interval() { assert_eq!( &Expr::Value(Value::Interval { value: "01:01.01".into(), - leading_field: DateTimeField::Minute, + leading_field: Some(DateTimeField::Minute), leading_precision: Some(5), last_field: Some(DateTimeField::Second), fractional_seconds_precision: Some(5), @@ -1485,7 +1485,7 @@ fn parse_literal_interval() { assert_eq!( &Expr::Value(Value::Interval { value: "1".into(), - leading_field: DateTimeField::Second, + leading_field: Some(DateTimeField::Second), leading_precision: Some(5), last_field: None, fractional_seconds_precision: Some(4), @@ -1498,7 +1498,7 @@ fn parse_literal_interval() { assert_eq!( &Expr::Value(Value::Interval { value: "10".into(), - leading_field: DateTimeField::Hour, + leading_field: Some(DateTimeField::Hour), leading_precision: None, last_field: None, fractional_seconds_precision: None, @@ -1511,7 +1511,7 @@ fn parse_literal_interval() { assert_eq!( &Expr::Value(Value::Interval { value: "10".into(), - leading_field: DateTimeField::Hour, + leading_field: Some(DateTimeField::Hour), leading_precision: Some(1), last_field: None, fractional_seconds_precision: None, @@ -1519,6 +1519,19 @@ fn parse_literal_interval() { expr_from_projection(only(&select.projection)), ); + let sql = "SELECT INTERVAL '1 DAY'"; + let select = verified_only_select(sql); + assert_eq!( + &Expr::Value(Value::Interval { + value: "1 DAY".into(), + leading_field: None, + leading_precision: None, + last_field: None, + fractional_seconds_precision: None, + }), + expr_from_projection(only(&select.projection)), + ); + let result = parse_sql_statements("SELECT INTERVAL '1' SECOND TO SECOND"); assert_eq!( ParserError::ParserError("Expected end of statement, found: SECOND".to_string()), @@ -1544,6 +1557,12 @@ fn parse_literal_interval() { verified_only_select("SELECT INTERVAL '1' HOUR TO MINUTE"); verified_only_select("SELECT INTERVAL '1' HOUR TO SECOND"); verified_only_select("SELECT INTERVAL '1' MINUTE TO SECOND"); + verified_only_select("SELECT INTERVAL '1 YEAR'"); + verified_only_select("SELECT INTERVAL '1 YEAR' AS one_year"); + one_statement_parses_to( + "SELECT INTERVAL '1 YEAR' one_year", + "SELECT INTERVAL '1 YEAR' AS one_year", + ); } #[test] From d9a7491d9a8f773d568b13c7b06d4f25a24e3892 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Wed, 10 Jun 2020 09:33:31 +0300 Subject: [PATCH 032/122] Various follow-ups to recent pushes - Update CHANGELOG - Update `.gitignore` for the build directory of the benchmark crate - Remove src/lib from the recently added benchmark crate per https://github.com/andygrove/sqlparser-rs/pull/190#pullrequestreview-425835379 --- .gitignore | 1 + CHANGELOG.md | 2 ++ sqlparser_bench/src/lib.rs | 7 ------- 3 files changed, 3 insertions(+), 7 deletions(-) delete mode 100644 sqlparser_bench/src/lib.rs diff --git a/.gitignore b/.gitignore index 46c66224f..dcc3cbd93 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # Generated by Cargo # will have compiled files and executables /target/ +/sqlparser_bench/target/ # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries # More information here http://doc.crates.io/guide.html#cargotoml-vs-cargolock diff --git a/CHANGELOG.md b/CHANGELOG.md index ab27d94e8..93cfe06b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented - **`Parser::parse_sql` now accepts a `&str` instead of `String` (#182)** - thanks @Dandandan! - Change `Ident` (previously a simple `String`) to store the parsed (unquoted) `value` of the identifier and the `quote_style` separately (#143) - thanks @apparebit! - Support Snowflake's `FROM (table_name)` (#155) - thanks @eyalleshem! +- Add line and column number to TokenizerError (#194) - thanks @Dandandan! +- Make the units keyword following `INTERVAL '...'` optional (#184) - thanks @maxcountryman! ### Added - Support MSSQL `TOP () [ PERCENT ] [ WITH TIES ]` (#150) - thanks @alexkyllo! diff --git a/sqlparser_bench/src/lib.rs b/sqlparser_bench/src/lib.rs deleted file mode 100644 index 31e1bb209..000000000 --- a/sqlparser_bench/src/lib.rs +++ /dev/null @@ -1,7 +0,0 @@ -#[cfg(test)] -mod tests { - #[test] - fn it_works() { - assert_eq!(2 + 2, 4); - } -} From 2f1015339aa9cd007994f8aea51ec96e62945066 Mon Sep 17 00:00:00 2001 From: Taehoon Moon Date: Wed, 10 Jun 2020 18:53:52 +0900 Subject: [PATCH 033/122] Add serde support to AST structs and enums (#196) Apply serde to AST structs and enums to be serializable/deserializable. serde support is optional, can be activated by feature named "serde". --- Cargo.toml | 3 ++- src/ast/data_type.rs | 3 +++ src/ast/ddl.rs | 8 ++++++++ src/ast/mod.rs | 22 ++++++++++++++++++++++ src/ast/operator.rs | 4 ++++ src/ast/query.rs | 20 ++++++++++++++++++++ src/ast/value.rs | 4 ++++ 7 files changed, 63 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 895f07f6a..3232e6a05 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,8 +19,9 @@ name = "sqlparser" path = "src/lib.rs" [dependencies] -bigdecimal = { version = "0.1.0", optional = true } +bigdecimal = { version = "0.1.0", features = ["serde"], optional = true } log = "0.4.5" +serde = { version = "1.0", features = ["derive"], optional = true } [dev-dependencies] simple_logger = "1.0.1" diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index cfbc2147d..fc8b98c55 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -11,10 +11,13 @@ // limitations under the License. use super::ObjectName; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; use std::fmt; /// SQL data types #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum DataType { /// Fixed-length character type e.g. CHAR(10) Char(Option), diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 776927669..d7503ba77 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -13,10 +13,13 @@ //! AST types specific to CREATE/ALTER variants of [Statement] //! (commonly referred to as Data Definition Language, or DDL) use super::{display_comma_separated, DataType, Expr, Ident, ObjectName}; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; use std::fmt; /// An `ALTER TABLE` (`Statement::AlterTable`) operation #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum AlterTableOperation { /// `ADD ` AddConstraint(TableConstraint), @@ -36,6 +39,7 @@ impl fmt::Display for AlterTableOperation { /// A table-level constraint, specified in a `CREATE TABLE` or an /// `ALTER TABLE ADD ` statement. #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum TableConstraint { /// `[ CONSTRAINT ] { PRIMARY KEY | UNIQUE } ()` Unique { @@ -95,6 +99,7 @@ impl fmt::Display for TableConstraint { /// SQL column definition #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct ColumnDef { pub name: Ident, pub data_type: DataType, @@ -129,6 +134,7 @@ impl fmt::Display for ColumnDef { /// non-constraint options, lumping them all together under the umbrella of /// "column options," and we allow any column option to be named. #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct ColumnOptionDef { pub name: Option, pub option: ColumnOption, @@ -143,6 +149,7 @@ impl fmt::Display for ColumnOptionDef { /// `ColumnOption`s are modifiers that follow a column definition in a `CREATE /// TABLE` statement. #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum ColumnOption { /// `NULL` Null, @@ -220,6 +227,7 @@ fn display_constraint_name<'a>(name: &'a Option) -> impl fmt::Display + ' /// /// Used in foreign key constraints in `ON UPDATE` and `ON DELETE` options. #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum ReferentialAction { Restrict, Cascade, diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 2dbf42b29..c04fb0269 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -18,6 +18,8 @@ mod operator; mod query; mod value; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; use std::fmt; pub use self::data_type::DataType; @@ -71,6 +73,7 @@ where /// An identifier, decomposed into its value or character data and the quote style. #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct Ident { /// The value of the identifier without quotes. pub value: String, @@ -127,6 +130,7 @@ impl fmt::Display for Ident { /// A name of a table, view, custom type, etc., possibly multi-part, i.e. db.schema.obj #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct ObjectName(pub Vec); impl fmt::Display for ObjectName { @@ -141,6 +145,7 @@ impl fmt::Display for ObjectName { /// (e.g. boolean vs string), so the caller must handle expressions of /// inappropriate type, like `WHERE 1` or `SELECT 1=1`, as necessary. #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum Expr { /// Identifier e.g. table name or column name Identifier(Ident), @@ -308,6 +313,7 @@ impl fmt::Display for Expr { /// A window specification (i.e. `OVER (PARTITION BY .. ORDER BY .. etc.)`) #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct WindowSpec { pub partition_by: Vec, pub order_by: Vec, @@ -353,6 +359,7 @@ impl fmt::Display for WindowSpec { /// Note: The parser does not validate the specified bounds; the caller should /// reject invalid bounds like `ROWS UNBOUNDED FOLLOWING` before execution. #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct WindowFrame { pub units: WindowFrameUnits, pub start_bound: WindowFrameBound, @@ -364,6 +371,7 @@ pub struct WindowFrame { } #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum WindowFrameUnits { Rows, Range, @@ -398,6 +406,7 @@ impl FromStr for WindowFrameUnits { /// Specifies [WindowFrame]'s `start_bound` and `end_bound` #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum WindowFrameBound { /// `CURRENT ROW` CurrentRow, @@ -422,6 +431,7 @@ impl fmt::Display for WindowFrameBound { /// A top-level statement (SELECT, INSERT, CREATE, etc.) #[allow(clippy::large_enum_variant)] #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum Statement { /// SELECT Query(Box), @@ -766,6 +776,7 @@ impl fmt::Display for Statement { /// SQL assignment `foo = expr` as used in SQLUpdate #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct Assignment { pub id: Ident, pub value: Expr, @@ -779,6 +790,7 @@ impl fmt::Display for Assignment { /// A function call #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct Function { pub name: ObjectName, pub args: Vec, @@ -805,6 +817,7 @@ impl fmt::Display for Function { /// External table's available file format #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum FileFormat { TEXTFILE, SEQUENCEFILE, @@ -856,6 +869,7 @@ impl FromStr for FileFormat { /// A `LISTAGG` invocation `LISTAGG( [ DISTINCT ] [, ] [ON OVERFLOW ] ) ) /// [ WITHIN GROUP (ORDER BY [, ...] ) ]` #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct ListAgg { pub distinct: bool, pub expr: Box, @@ -892,6 +906,7 @@ impl fmt::Display for ListAgg { /// The `ON OVERFLOW` clause of a LISTAGG invocation #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum ListAggOnOverflow { /// `ON OVERFLOW ERROR` Error, @@ -925,6 +940,7 @@ impl fmt::Display for ListAggOnOverflow { } #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum ObjectType { Table, View, @@ -944,6 +960,7 @@ impl fmt::Display for ObjectType { } #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct SqlOption { pub name: Ident, pub value: Value, @@ -956,6 +973,7 @@ impl fmt::Display for SqlOption { } #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum TransactionMode { AccessMode(TransactionAccessMode), IsolationLevel(TransactionIsolationLevel), @@ -972,6 +990,7 @@ impl fmt::Display for TransactionMode { } #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum TransactionAccessMode { ReadOnly, ReadWrite, @@ -988,6 +1007,7 @@ impl fmt::Display for TransactionAccessMode { } #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum TransactionIsolationLevel { ReadUncommitted, ReadCommitted, @@ -1008,6 +1028,7 @@ impl fmt::Display for TransactionIsolationLevel { } #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum ShowStatementFilter { Like(String), Where(Expr), @@ -1024,6 +1045,7 @@ impl fmt::Display for ShowStatementFilter { } #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum SetVariableValue { Ident(Ident), Literal(Value), diff --git a/src/ast/operator.rs b/src/ast/operator.rs index c9f5eb2e9..63e75eead 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -10,10 +10,13 @@ // See the License for the specific language governing permissions and // limitations under the License. +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; use std::fmt; /// Unary operators #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum UnaryOperator { Plus, Minus, @@ -32,6 +35,7 @@ impl fmt::Display for UnaryOperator { /// Binary operators #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum BinaryOperator { Plus, Minus, diff --git a/src/ast/query.rs b/src/ast/query.rs index a5918f1a3..73477b126 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -11,10 +11,13 @@ // limitations under the License. use super::*; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; /// The most complete variant of a `SELECT` query expression, optionally /// including `WITH`, `UNION` / other set operations, and `ORDER BY`. #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct Query { /// WITH (common table expressions, or CTEs) pub ctes: Vec, @@ -55,6 +58,7 @@ impl fmt::Display for Query { /// A node in a tree, representing a "query body" expression, roughly: /// `SELECT ... [ {UNION|EXCEPT|INTERSECT} SELECT ...]` #[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum SetExpr { /// Restricted SELECT .. FROM .. HAVING (no ORDER BY or set operations) Select(Box