Skip to content

Commit

Permalink
Add support for unit on char length units for small character string …
Browse files Browse the repository at this point in the history
…types. (#663)

This results in complete support for ANSI CHARACTER, CHAR, CHARACTER VARYING,
CHAR VARYING, and VARCHAR.
  • Loading branch information
AugustoFKL committed Oct 11, 2022
1 parent 7776726 commit cacdf33
Show file tree
Hide file tree
Showing 6 changed files with 244 additions and 34 deletions.
84 changes: 70 additions & 14 deletions src/ast/data_type.rs
Expand Up @@ -26,15 +26,15 @@ use super::value::escape_single_quote_string;
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum DataType {
/// Fixed-length character type e.g. CHARACTER(10)
Character(Option<u64>),
Character(Option<CharacterLength>),
/// Fixed-length char type e.g. CHAR(10)
Char(Option<u64>),
Char(Option<CharacterLength>),
/// Character varying type e.g. CHARACTER VARYING(10)
CharacterVarying(Option<u64>),
CharacterVarying(Option<CharacterLength>),
/// Char varying type e.g. CHAR VARYING(10)
CharVarying(Option<u64>),
CharVarying(Option<CharacterLength>),
/// Variable-length character type e.g. VARCHAR(10)
Varchar(Option<u64>),
Varchar(Option<CharacterLength>),
/// Variable-length character type e.g. NVARCHAR(10)
Nvarchar(Option<u64>),
/// Uuid type
Expand Down Expand Up @@ -133,17 +133,14 @@ pub enum DataType {
impl fmt::Display for DataType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
DataType::Character(size) => {
format_type_with_optional_length(f, "CHARACTER", size, false)
}
DataType::Char(size) => format_type_with_optional_length(f, "CHAR", size, false),
DataType::Character(size) => format_character_string_type(f, "CHARACTER", size),
DataType::Char(size) => format_character_string_type(f, "CHAR", size),
DataType::CharacterVarying(size) => {
format_type_with_optional_length(f, "CHARACTER VARYING", size, false)
}
DataType::CharVarying(size) => {
format_type_with_optional_length(f, "CHAR VARYING", size, false)
format_character_string_type(f, "CHARACTER VARYING", size)
}
DataType::Varchar(size) => format_type_with_optional_length(f, "VARCHAR", size, false),

DataType::CharVarying(size) => format_character_string_type(f, "CHAR VARYING", size),
DataType::Varchar(size) => format_character_string_type(f, "VARCHAR", size),
DataType::Nvarchar(size) => {
format_type_with_optional_length(f, "NVARCHAR", size, false)
}
Expand Down Expand Up @@ -247,6 +244,18 @@ fn format_type_with_optional_length(
Ok(())
}

fn format_character_string_type(
f: &mut fmt::Formatter,
sql_type: &str,
size: &Option<CharacterLength>,
) -> fmt::Result {
write!(f, "{}", sql_type)?;
if let Some(size) = size {
write!(f, "({})", size)?;
}
Ok(())
}

/// Timestamp and Time data types information about TimeZone formatting.
///
/// This is more related to a display information than real differences between each variant. To
Expand Down Expand Up @@ -324,3 +333,50 @@ impl fmt::Display for ExactNumberInfo {
}
}
}

/// Information about [character length][1], including length and possibly unit.
///
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#character-length
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct CharacterLength {
/// Default (if VARYING) or maximum (if not VARYING) length
pub length: u64,
/// Optional unit. If not informed, the ANSI handles it as CHARACTERS implicitly
pub unit: Option<CharLengthUnits>,
}

impl fmt::Display for CharacterLength {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.length)?;
if let Some(unit) = &self.unit {
write!(f, " {}", unit)?;
}
Ok(())
}
}

/// Possible units for characters, initially based on 2016 ANSI [standard][1].
///
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#char-length-units
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum CharLengthUnits {
/// CHARACTERS unit
Characters,
/// OCTETS unit
Octets,
}

impl fmt::Display for CharLengthUnits {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Characters => {
write!(f, "CHARACTERS")
}
Self::Octets => {
write!(f, "OCTETS")
}
}
}
}
6 changes: 3 additions & 3 deletions src/ast/mod.rs
Expand Up @@ -22,9 +22,9 @@ use core::fmt;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

pub use self::data_type::DataType;
pub use self::data_type::ExactNumberInfo;
pub use self::data_type::TimezoneInfo;
pub use self::data_type::{
CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
};
pub use self::ddl::{
AlterColumnOperation, AlterTableOperation, ColumnDef, ColumnOption, ColumnOptionDef,
ReferentialAction, TableConstraint,
Expand Down
2 changes: 2 additions & 0 deletions src/keywords.rs
Expand Up @@ -123,6 +123,7 @@ define_keywords!(
CHANGE,
CHAR,
CHARACTER,
CHARACTERS,
CHARACTER_LENGTH,
CHARSET,
CHAR_LENGTH,
Expand Down Expand Up @@ -372,6 +373,7 @@ define_keywords!(
NVARCHAR,
OBJECT,
OCCURRENCES_REGEX,
OCTETS,
OCTET_LENGTH,
OF,
OFFSET,
Expand Down
156 changes: 145 additions & 11 deletions src/parser.rs
Expand Up @@ -3426,20 +3426,24 @@ impl<'a> Parser<'a> {
Ok(DataType::BigInt(optional_precision?))
}
}
Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_precision()?)),
Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_character_length()?)),
Keyword::NVARCHAR => Ok(DataType::Nvarchar(self.parse_optional_precision()?)),
Keyword::CHARACTER => {
if self.parse_keyword(Keyword::VARYING) {
Ok(DataType::CharacterVarying(self.parse_optional_precision()?))
Ok(DataType::CharacterVarying(
self.parse_optional_character_length()?,
))
} else {
Ok(DataType::Character(self.parse_optional_precision()?))
Ok(DataType::Character(self.parse_optional_character_length()?))
}
}
Keyword::CHAR => {
if self.parse_keyword(Keyword::VARYING) {
Ok(DataType::CharVarying(self.parse_optional_precision()?))
Ok(DataType::CharVarying(
self.parse_optional_character_length()?,
))
} else {
Ok(DataType::Char(self.parse_optional_precision()?))
Ok(DataType::Char(self.parse_optional_character_length()?))
}
}
Keyword::CLOB => Ok(DataType::Clob(self.parse_optional_precision()?)),
Expand Down Expand Up @@ -3680,6 +3684,31 @@ impl<'a> Parser<'a> {
}
}

pub fn parse_optional_character_length(
&mut self,
) -> Result<Option<CharacterLength>, ParserError> {
if self.consume_token(&Token::LParen) {
let character_length = self.parse_character_length()?;
self.expect_token(&Token::RParen)?;
Ok(Some(character_length))
} else {
Ok(None)
}
}

pub fn parse_character_length(&mut self) -> Result<CharacterLength, ParserError> {
let length = self.parse_literal_uint()?;
let unit = if self.parse_keyword(Keyword::CHARACTERS) {
Some(CharLengthUnits::Characters)
} else if self.parse_keyword(Keyword::OCTETS) {
Some(CharLengthUnits::Octets)
} else {
None
};

Ok(CharacterLength { length, unit })
}

pub fn parse_optional_precision_scale(
&mut self,
) -> Result<(Option<u64>, Option<u64>), ParserError> {
Expand Down Expand Up @@ -5337,7 +5366,9 @@ mod tests {

#[cfg(test)]
mod test_parse_data_type {
use crate::ast::{DataType, ExactNumberInfo, TimezoneInfo};
use crate::ast::{
CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
};
use crate::dialect::{AnsiDialect, GenericDialect};
use crate::test_utils::TestedDialects;

Expand All @@ -5360,21 +5391,124 @@ mod tests {

test_parse_data_type!(dialect, "CHARACTER", DataType::Character(None));

test_parse_data_type!(dialect, "CHARACTER(20)", DataType::Character(Some(20)));
test_parse_data_type!(
dialect,
"CHARACTER(20)",
DataType::Character(Some(CharacterLength {
length: 20,
unit: None
}))
);

test_parse_data_type!(
dialect,
"CHARACTER(20 CHARACTERS)",
DataType::Character(Some(CharacterLength {
length: 20,
unit: Some(CharLengthUnits::Characters)
}))
);

test_parse_data_type!(
dialect,
"CHARACTER(20 OCTETS)",
DataType::Character(Some(CharacterLength {
length: 20,
unit: Some(CharLengthUnits::Octets)
}))
);

test_parse_data_type!(dialect, "CHAR", DataType::Char(None));

test_parse_data_type!(dialect, "CHAR(20)", DataType::Char(Some(20)));
test_parse_data_type!(
dialect,
"CHAR(20)",
DataType::Char(Some(CharacterLength {
length: 20,
unit: None
}))
);

test_parse_data_type!(
dialect,
"CHAR(20 CHARACTERS)",
DataType::Char(Some(CharacterLength {
length: 20,
unit: Some(CharLengthUnits::Characters)
}))
);

test_parse_data_type!(
dialect,
"CHAR(20 OCTETS)",
DataType::Char(Some(CharacterLength {
length: 20,
unit: Some(CharLengthUnits::Octets)
}))
);

test_parse_data_type!(
dialect,
"CHARACTER VARYING(20)",
DataType::CharacterVarying(Some(20))
DataType::CharacterVarying(Some(CharacterLength {
length: 20,
unit: None
}))
);

test_parse_data_type!(
dialect,
"CHARACTER VARYING(20 CHARACTERS)",
DataType::CharacterVarying(Some(CharacterLength {
length: 20,
unit: Some(CharLengthUnits::Characters)
}))
);

test_parse_data_type!(
dialect,
"CHARACTER VARYING(20 OCTETS)",
DataType::CharacterVarying(Some(CharacterLength {
length: 20,
unit: Some(CharLengthUnits::Octets)
}))
);

test_parse_data_type!(dialect, "CHAR VARYING(20)", DataType::CharVarying(Some(20)));
test_parse_data_type!(
dialect,
"CHAR VARYING(20)",
DataType::CharVarying(Some(CharacterLength {
length: 20,
unit: None
}))
);

test_parse_data_type!(
dialect,
"CHAR VARYING(20 CHARACTERS)",
DataType::CharVarying(Some(CharacterLength {
length: 20,
unit: Some(CharLengthUnits::Characters)
}))
);

test_parse_data_type!(dialect, "VARCHAR(20)", DataType::Varchar(Some(20)));
test_parse_data_type!(
dialect,
"CHAR VARYING(20 OCTETS)",
DataType::CharVarying(Some(CharacterLength {
length: 20,
unit: Some(CharLengthUnits::Octets)
}))
);

test_parse_data_type!(
dialect,
"VARCHAR(20)",
DataType::Varchar(Some(CharacterLength {
length: 20,
unit: None
}))
);
}

#[test]
Expand Down
15 changes: 12 additions & 3 deletions tests/sqlparser_common.rs
Expand Up @@ -1945,7 +1945,10 @@ fn parse_create_table() {
vec![
ColumnDef {
name: "name".into(),
data_type: DataType::Varchar(Some(100)),
data_type: DataType::Varchar(Some(CharacterLength {
length: 100,
unit: None
})),
collation: None,
options: vec![ColumnOptionDef {
name: None,
Expand Down Expand Up @@ -2401,7 +2404,10 @@ fn parse_create_external_table() {
vec![
ColumnDef {
name: "name".into(),
data_type: DataType::Varchar(Some(100)),
data_type: DataType::Varchar(Some(CharacterLength {
length: 100,
unit: None
})),
collation: None,
options: vec![ColumnOptionDef {
name: None,
Expand Down Expand Up @@ -2469,7 +2475,10 @@ fn parse_create_or_replace_external_table() {
columns,
vec![ColumnDef {
name: "name".into(),
data_type: DataType::Varchar(Some(100)),
data_type: DataType::Varchar(Some(CharacterLength {
length: 100,
unit: None
})),
collation: None,
options: vec![ColumnOptionDef {
name: None,
Expand Down

0 comments on commit cacdf33

Please sign in to comment.