Skip to content

Commit

Permalink
Add parse_multipart_identifier function to parser (#860)
Browse files Browse the repository at this point in the history
* Add parse_multipart_identifier function to parser

* Update doc for parse_multipart_identifier

* Fix conflict
  • Loading branch information
Jefffrey committed May 17, 2023
1 parent 482a3ad commit 4559d87
Showing 1 changed file with 167 additions and 0 deletions.
167 changes: 167 additions & 0 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4707,6 +4707,92 @@ impl<'a> Parser<'a> {
Ok(idents)
}

/// Parse identifiers of form ident1[.identN]*
///
/// Similar in functionality to [parse_identifiers], with difference
/// being this function is much more strict about parsing a valid multipart identifier, not
/// allowing extraneous tokens to be parsed, otherwise it fails.
///
/// For example:
///
/// ```rust
/// use sqlparser::ast::Ident;
/// use sqlparser::dialect::GenericDialect;
/// use sqlparser::parser::Parser;
///
/// let dialect = GenericDialect {};
/// let expected = vec![Ident::new("one"), Ident::new("two")];
///
/// // expected usage
/// let sql = "one.two";
/// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap();
/// let actual = parser.parse_multipart_identifier().unwrap();
/// assert_eq!(&actual, &expected);
///
/// // parse_identifiers is more loose on what it allows, parsing successfully
/// let sql = "one + two";
/// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap();
/// let actual = parser.parse_identifiers().unwrap();
/// assert_eq!(&actual, &expected);
///
/// // expected to strictly fail due to + separator
/// let sql = "one + two";
/// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap();
/// let actual = parser.parse_multipart_identifier().unwrap_err();
/// assert_eq!(
/// actual.to_string(),
/// "sql parser error: Unexpected token in identifier: +"
/// );
/// ```
///
/// [parse_identifiers]: Parser::parse_identifiers
pub fn parse_multipart_identifier(&mut self) -> Result<Vec<Ident>, ParserError> {
let mut idents = vec![];

// expecting at least one word for identifier
match self.next_token().token {
Token::Word(w) => idents.push(w.to_ident()),
Token::EOF => {
return Err(ParserError::ParserError(
"Empty input when parsing identifier".to_string(),
))?
}
token => {
return Err(ParserError::ParserError(format!(
"Unexpected token in identifier: {token}"
)))?
}
};

// parse optional next parts if exist
loop {
match self.next_token().token {
// ensure that optional period is succeeded by another identifier
Token::Period => match self.next_token().token {
Token::Word(w) => idents.push(w.to_ident()),
Token::EOF => {
return Err(ParserError::ParserError(
"Trailing period in identifier".to_string(),
))?
}
token => {
return Err(ParserError::ParserError(format!(
"Unexpected token following period in identifier: {token}"
)))?
}
},
Token::EOF => break,
token => {
return Err(ParserError::ParserError(format!(
"Unexpected token in identifier: {token}"
)))?
}
}
}

Ok(idents)
}

/// Parse a simple one-word identifier (possibly quoted, possibly a keyword)
pub fn parse_identifier(&mut self) -> Result<Ident, ParserError> {
let next_token = self.next_token();
Expand Down Expand Up @@ -7455,4 +7541,85 @@ mod tests {
))
);
}

#[test]
fn test_parse_multipart_identifier_positive() {
let dialect = TestedDialects {
dialects: vec![Box::new(GenericDialect {})],
options: None,
};

// parse multipart with quotes
let expected = vec![
Ident {
value: "CATALOG".to_string(),
quote_style: None,
},
Ident {
value: "F(o)o. \"bar".to_string(),
quote_style: Some('"'),
},
Ident {
value: "table".to_string(),
quote_style: None,
},
];
dialect.run_parser_method(r#"CATALOG."F(o)o. ""bar".table"#, |parser| {
let actual = parser.parse_multipart_identifier().unwrap();
assert_eq!(expected, actual);
});

// allow whitespace between ident parts
let expected = vec![
Ident {
value: "CATALOG".to_string(),
quote_style: None,
},
Ident {
value: "table".to_string(),
quote_style: None,
},
];
dialect.run_parser_method("CATALOG . table", |parser| {
let actual = parser.parse_multipart_identifier().unwrap();
assert_eq!(expected, actual);
});
}

#[test]
fn test_parse_multipart_identifier_negative() {
macro_rules! test_parse_multipart_identifier_error {
($input:expr, $expected_err:expr $(,)?) => {{
all_dialects().run_parser_method(&*$input, |parser| {
let actual_err = parser.parse_multipart_identifier().unwrap_err();
assert_eq!(actual_err.to_string(), $expected_err);
});
}};
}

test_parse_multipart_identifier_error!(
"",
"sql parser error: Empty input when parsing identifier",
);

test_parse_multipart_identifier_error!(
"*schema.table",
"sql parser error: Unexpected token in identifier: *",
);

test_parse_multipart_identifier_error!(
"schema.table*",
"sql parser error: Unexpected token in identifier: *",
);

test_parse_multipart_identifier_error!(
"schema.table.",
"sql parser error: Trailing period in identifier",
);

test_parse_multipart_identifier_error!(
"schema.*",
"sql parser error: Unexpected token following period in identifier: *",
);
}
}

0 comments on commit 4559d87

Please sign in to comment.