Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(parser): disallow empty object names #8171

Merged
merged 7 commits into from
Feb 26, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions e2e_test/ddl/schema.slt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ create schema ddl_schema;
statement error
create schema ddl_schema;

# Create a schema using a empty string.
statement error
create schema "";

# Create another schema with duplicated name and if not exists.
statement ok
create schema if not exists ddl_schema;
Expand Down
4 changes: 4 additions & 0 deletions e2e_test/ddl/table.slt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ explain select v1 from ddl_t;
statement error
create table ddl_t (v2 int);

# Create a table using a empty string.
statement error
create table ""(v2 int);

statement ok
create table if not exists ddl_t (v2 int);

Expand Down
6 changes: 3 additions & 3 deletions src/frontend/src/binder/relation/join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ impl Binder {
.indices_of
.iter()
.filter(|(s, _)| *s != "_row_id") // filter out `_row_id`
.map(|(s, idxes)| (Ident::new(s.to_owned()), idxes))
.map(|(s, idxes)| (Ident::new_safe(s.to_owned()), idxes))
.collect::<Vec<_>>();
columns.sort_by(|a, b| a.0.real_value().cmp(&b.0.real_value()));

Expand Down Expand Up @@ -213,12 +213,12 @@ impl Binder {
if indices.len() == 1 {
let right_table = context.columns[indices[0]].table_name.clone();
Ok(Expr::CompoundIdentifier(vec![
Ident::new(right_table),
Ident::new_safe(right_table),
column,
]))
} else if let Some(group_id) = context.column_group_context.mapping.get(&indices[0]) {
Ok(Expr::CompoundIdentifier(vec![
Ident::new(format!("{COLUMN_GROUP_PREFIX}{}", group_id)),
Ident::new_safe(format!("{COLUMN_GROUP_PREFIX}{}", group_id)),
column,
]))
} else {
Expand Down
106 changes: 74 additions & 32 deletions src/sqlparser/src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ pub use self::statement::*;
pub use self::value::{DateTimeField, TrimWhereField, Value};
use crate::keywords::Keyword;
use crate::parser::{Parser, ParserError};
use crate::tokenizer::Word;

pub struct DisplaySeparated<'a, T>
where
Expand Down Expand Up @@ -96,7 +97,10 @@ pub struct Ident {

impl Ident {
/// Create a new identifier with the given value and no quotes.
pub fn new<S>(value: S) -> Self
/// In situations like construct a identifier from existing object
/// names, we may prefer to use this 'cause the input string is
/// guanteed to be not empty.
pub fn new_safe<S>(value: S) -> Self
broccoliSpicy marked this conversation as resolved.
Show resolved Hide resolved
where
S: Into<String>,
{
Expand All @@ -106,19 +110,57 @@ impl Ident {
}
}

/// Create a new quoted identifier with the given quote and value. This function
/// panics if the given quote is not a valid quote character.
pub fn with_quote<S>(quote: char, value: S) -> Self
/// Create a new identifier from word
pub fn new_from_word(w: &Word) -> Result<Ident, ParserError> {
if w.value.is_empty() {
Err(ParserError::ParserError(format!(
"zero-length delimited identifier at or near \"{w}\""
)))
} else {
Ok(Ident {
value: w.value.clone(),
quote_style: w.quote_style,
})
}
}
broccoliSpicy marked this conversation as resolved.
Show resolved Hide resolved

/// Create a new quoted identifier with the given quote and value.
/// the inputs are guanteed to be legal.
pub fn new_with_quote_safe<S>(quote: char, value: S) -> Self
broccoliSpicy marked this conversation as resolved.
Show resolved Hide resolved
where
S: Into<String>,
{
assert!(quote == '\'' || quote == '"' || quote == '`' || quote == '[');
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can keep the assertion?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, if the function name is _unchecked, doesn't seem coherent to add assertions.

Ident {
value: value.into(),
quote_style: Some(quote),
}
}

/// Create a new quoted identifier with the given quote and value.
/// returns ParserError when the given string is empty or the given quote is illegal.
pub fn new_with_quote_check<S>(quote: char, value: S) -> Result<Ident, ParserError>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is this function used? I didn't find that

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shoud I remove this function? it might be useful, and I just found this asymmetry jarring defining only a unchecked function

where
S: Into<String>,
{
let value_str = value.into();
if value_str.is_empty() {
return Err(ParserError::ParserError(format!(
"zero-length delimited identifier at or near \"{value_str}\""
)));
}

if !(quote == '\'' || quote == '"' || quote == '`' || quote == '[') {
return Err(ParserError::ParserError(
"unexpected quote style".to_string(),
));
}

Ok(Ident {
value: value_str,
quote_style: Some(quote),
})
}

/// Value after considering quote style
/// In certain places, double quotes can force case-sensitive, but not always
/// e.g. session variables.
Expand Down Expand Up @@ -2224,27 +2266,27 @@ mod tests {
fn test_grouping_sets_display() {
// a and b in different group
let grouping_sets = Expr::GroupingSets(vec![
vec![Expr::Identifier(Ident::new("a"))],
vec![Expr::Identifier(Ident::new("b"))],
vec![Expr::Identifier(Ident::new_safe("a"))],
vec![Expr::Identifier(Ident::new_safe("b"))],
]);
assert_eq!("GROUPING SETS ((a), (b))", format!("{}", grouping_sets));

// a and b in the same group
let grouping_sets = Expr::GroupingSets(vec![vec![
Expr::Identifier(Ident::new("a")),
Expr::Identifier(Ident::new("b")),
Expr::Identifier(Ident::new_safe("a")),
Expr::Identifier(Ident::new_safe("b")),
]]);
assert_eq!("GROUPING SETS ((a, b))", format!("{}", grouping_sets));

// (a, b) and (c, d) in different group
let grouping_sets = Expr::GroupingSets(vec![
vec![
Expr::Identifier(Ident::new("a")),
Expr::Identifier(Ident::new("b")),
Expr::Identifier(Ident::new_safe("a")),
Expr::Identifier(Ident::new_safe("b")),
],
vec![
Expr::Identifier(Ident::new("c")),
Expr::Identifier(Ident::new("d")),
Expr::Identifier(Ident::new_safe("c")),
Expr::Identifier(Ident::new_safe("d")),
],
]);
assert_eq!(
Expand All @@ -2255,64 +2297,64 @@ mod tests {

#[test]
fn test_rollup_display() {
let rollup = Expr::Rollup(vec![vec![Expr::Identifier(Ident::new("a"))]]);
let rollup = Expr::Rollup(vec![vec![Expr::Identifier(Ident::new_safe("a"))]]);
assert_eq!("ROLLUP (a)", format!("{}", rollup));

let rollup = Expr::Rollup(vec![vec![
Expr::Identifier(Ident::new("a")),
Expr::Identifier(Ident::new("b")),
Expr::Identifier(Ident::new_safe("a")),
Expr::Identifier(Ident::new_safe("b")),
]]);
assert_eq!("ROLLUP ((a, b))", format!("{}", rollup));

let rollup = Expr::Rollup(vec![
vec![Expr::Identifier(Ident::new("a"))],
vec![Expr::Identifier(Ident::new("b"))],
vec![Expr::Identifier(Ident::new_safe("a"))],
vec![Expr::Identifier(Ident::new_safe("b"))],
]);
assert_eq!("ROLLUP (a, b)", format!("{}", rollup));

let rollup = Expr::Rollup(vec![
vec![Expr::Identifier(Ident::new("a"))],
vec![Expr::Identifier(Ident::new_safe("a"))],
vec![
Expr::Identifier(Ident::new("b")),
Expr::Identifier(Ident::new("c")),
Expr::Identifier(Ident::new_safe("b")),
Expr::Identifier(Ident::new_safe("c")),
],
vec![Expr::Identifier(Ident::new("d"))],
vec![Expr::Identifier(Ident::new_safe("d"))],
]);
assert_eq!("ROLLUP (a, (b, c), d)", format!("{}", rollup));
}

#[test]
fn test_cube_display() {
let cube = Expr::Cube(vec![vec![Expr::Identifier(Ident::new("a"))]]);
let cube = Expr::Cube(vec![vec![Expr::Identifier(Ident::new_safe("a"))]]);
assert_eq!("CUBE (a)", format!("{}", cube));

let cube = Expr::Cube(vec![vec![
Expr::Identifier(Ident::new("a")),
Expr::Identifier(Ident::new("b")),
Expr::Identifier(Ident::new_safe("a")),
Expr::Identifier(Ident::new_safe("b")),
]]);
assert_eq!("CUBE ((a, b))", format!("{}", cube));

let cube = Expr::Cube(vec![
vec![Expr::Identifier(Ident::new("a"))],
vec![Expr::Identifier(Ident::new("b"))],
vec![Expr::Identifier(Ident::new_safe("a"))],
vec![Expr::Identifier(Ident::new_safe("b"))],
]);
assert_eq!("CUBE (a, b)", format!("{}", cube));

let cube = Expr::Cube(vec![
vec![Expr::Identifier(Ident::new("a"))],
vec![Expr::Identifier(Ident::new_safe("a"))],
vec![
Expr::Identifier(Ident::new("b")),
Expr::Identifier(Ident::new("c")),
Expr::Identifier(Ident::new_safe("b")),
Expr::Identifier(Ident::new_safe("c")),
],
vec![Expr::Identifier(Ident::new("d"))],
vec![Expr::Identifier(Ident::new_safe("d"))],
]);
assert_eq!("CUBE (a, (b, c), d)", format!("{}", cube));
}

#[test]
fn test_array_index_display() {
let array_index = Expr::ArrayIndex {
obj: Box::new(Expr::Identifier(Ident::new("v1"))),
obj: Box::new(Expr::Identifier(Ident::new_safe("v1"))),
index: Box::new(Expr::Value(Value::Number("1".into()))),
};
assert_eq!("v1[1]", format!("{}", array_index));
Expand Down
40 changes: 19 additions & 21 deletions src/sqlparser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ pub enum ParserError {
}

// Use `Parser::expected` instead, if possible
#[macro_export]
macro_rules! parser_err {
($MSG:expr) => {
Err(ParserError::ParserError($MSG.to_string()))
Expand Down Expand Up @@ -282,7 +283,7 @@ impl Parser {
// Since there's no parenthesis, `w` must be a column or a table
// So what follows must be dot-delimited identifiers, e.g. `a.b.c.*`
let wildcard_expr = self.parse_simple_wildcard_expr(index)?;
return self.word_concat_wildcard_expr(w.to_ident(), wildcard_expr);
return self.word_concat_wildcard_expr(w.to_ident()?, wildcard_expr);
}
Token::Mul => {
return Ok(WildcardOrExpr::Wildcard);
Expand Down Expand Up @@ -374,7 +375,7 @@ impl Parser {
let mut id_parts = vec![];
while self.consume_token(&Token::Period) {
match self.next_token() {
Token::Word(w) => id_parts.push(w.to_ident()),
Token::Word(w) => id_parts.push(w.to_ident()?),
Token::Mul => {
return if id_parts.is_empty() {
Ok(WildcardOrExpr::Wildcard)
Expand Down Expand Up @@ -481,10 +482,10 @@ impl Parser {
// identifier, a function call, or a simple identifier:
_ => match self.peek_token() {
Token::LParen | Token::Period => {
let mut id_parts: Vec<Ident> = vec![w.to_ident()];
let mut id_parts: Vec<Ident> = vec![w.to_ident()?];
while self.consume_token(&Token::Period) {
match self.next_token() {
Token::Word(w) => id_parts.push(w.to_ident()),
Token::Word(w) => id_parts.push(w.to_ident()?),
unexpected => {
return self
.expected("an identifier or a '*' after '.'", unexpected);
Expand All @@ -499,7 +500,7 @@ impl Parser {
Ok(Expr::CompoundIdentifier(id_parts))
}
}
_ => Ok(Expr::Identifier(w.to_ident())),
_ => Ok(Expr::Identifier(w.to_ident()?)),
},
}, // End of Token::Word

Expand Down Expand Up @@ -600,7 +601,7 @@ impl Parser {
while self.consume_token(&Token::Period) {
match self.next_token() {
Token::Word(w) => {
idents.push(w.to_ident());
idents.push(w.to_ident()?);
}
unexpected => {
return self.expected("an identifier after '.'", unexpected);
Expand Down Expand Up @@ -2442,11 +2443,11 @@ impl Parser {
let token = self.peek_token();
match (self.parse_value(), token) {
(Ok(value), _) => Ok(SetVariableValue::Literal(value)),
(Err(_), Token::Word(ident)) => {
if ident.keyword == Keyword::DEFAULT {
(Err(_), Token::Word(w)) => {
if w.keyword == Keyword::DEFAULT {
Ok(SetVariableValue::Default)
} else {
Ok(SetVariableValue::Ident(ident.to_ident()))
Ok(SetVariableValue::Ident(w.to_ident()?))
}
}
(Err(_), unexpected) => self.expected("variable value", unexpected),
Expand Down Expand Up @@ -2504,7 +2505,7 @@ impl Parser {
match self.next_token() {
Token::Word(Word { value, keyword, .. }) if keyword == Keyword::NoKeyword => {
if self.peek_token() == Token::LParen {
return self.parse_function(ObjectName(vec![Ident::new(value)]));
return self.parse_function(ObjectName(vec![Ident::new_safe(value)]));
}
Ok(Expr::Value(Value::SingleQuotedString(value)))
}
Expand Down Expand Up @@ -2655,7 +2656,7 @@ impl Parser {
// (For example, in `FROM t1 JOIN` the `JOIN` will always be parsed as a keyword,
// not an alias.)
Token::Word(w) if after_as || (!reserved_kwds.contains(&w.keyword)) => {
Ok(Some(w.to_ident()))
Ok(Some(w.to_ident()?))
}
// MSSQL supports single-quoted strings as aliases for columns
// We accept them as table aliases too, although MSSQL does not.
Expand All @@ -2669,7 +2670,7 @@ impl Parser {
// character. When it sees such a <literal>, your DBMS will
// ignore the <separator> and treat the multiple strings as
// a single <literal>."
Token::SingleQuotedString(s) => Ok(Some(Ident::with_quote('\'', s))),
Token::SingleQuotedString(s) => Ok(Some(Ident::new_with_quote_safe('\'', s))),
not_an_ident => {
if after_as {
return self.expected("an identifier after AS", not_an_ident);
Expand Down Expand Up @@ -2720,7 +2721,7 @@ impl Parser {
break;
}

idents.push(w.to_ident());
idents.push(w.to_ident()?);
}
Token::EOF | Token::Eq => break,
_ => {}
Expand All @@ -2738,7 +2739,7 @@ impl Parser {
loop {
match self.next_token() {
Token::Word(w) => {
idents.push(w.to_ident());
idents.push(w.to_ident()?);
}
Token::EOF => break,
_ => {}
Expand All @@ -2751,7 +2752,7 @@ impl Parser {
/// Parse a simple one-word identifier (possibly quoted, possibly a keyword)
pub fn parse_identifier(&mut self) -> Result<Ident, ParserError> {
match self.next_token() {
Token::Word(w) => Ok(w.to_ident()),
Token::Word(w) => Ok(w.to_ident()?),
unexpected => self.expected("identifier", unexpected),
}
}
Expand All @@ -2762,7 +2763,7 @@ impl Parser {
Token::Word(w) => {
match keywords::RESERVED_FOR_COLUMN_OR_TABLE_NAME.contains(&w.keyword) {
true => parser_err!(format!("syntax error at or near \"{w}\"")),
false => Ok(w.to_ident()),
false => Ok(w.to_ident()?),
}
}
unexpected => self.expected("identifier", unexpected),
Expand Down Expand Up @@ -3995,11 +3996,8 @@ impl Parser {
}

impl Word {
pub fn to_ident(&self) -> Ident {
Ident {
value: self.value.clone(),
quote_style: self.quote_style,
}
pub fn to_ident(&self) -> Result<Ident, ParserError> {
Ident::new_from_word(self)
}
}

Expand Down