Skip to content

Commit

Permalink
Remove more RegexParsers
Browse files Browse the repository at this point in the history
  • Loading branch information
judahrand committed Jun 30, 2022
1 parent ed04460 commit b792eec
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 31 deletions.
126 changes: 99 additions & 27 deletions src/sqlfluff/dialects/dialect_snowflake.py
Expand Up @@ -31,6 +31,7 @@
StringLexer,
StringParser,
SymbolSegment,
MultiStringParser,
)
from sqlfluff.dialects.dialect_snowflake_keywords import (
snowflake_reserved_keywords,
Expand Down Expand Up @@ -99,6 +100,55 @@
("exclude", "StartExcludeBracketSegment", "EndExcludeBracketSegment", True)
)

# Add all Snowflake compression types
snowflake_dialect.sets("compression_types").clear()
snowflake_dialect.sets("compression_types").update(
[
"AUTO",
"AUTO_DETECT",
"GZIP",
"BZ2",
"BROTLI",
"ZSTD",
"DEFLATE",
"RAW_DEFLATE",
"LZO",
"NONE",
"SNAPPY",
],
)

# Add all Snowflake supported file types
snowflake_dialect.sets("files_types").clear()
snowflake_dialect.sets("files_types").update(
["CSV", "JSON", "AVRO", "ORC" "PARQUET", "XML"],
)

snowflake_dialect.sets("warehouse_sizes").clear()
snowflake_dialect.sets("warehouse_sizes").update(
[
"XSMALL",
"SMALL",
"MEDIUM",
"LARGE",
"XLARGE",
"XXLARGE",
"X2LARGE",
"XXXLARGE",
"X3LARGE",
"X4LARGE",
"X5LARGE",
"X6LARGE",
"X-SMALL",
"X-LARGE",
"2X-LARGE",
"3X-LARGE",
"4X-LARGE",
"5X-LARGE",
"6X-LARGE",
],
)

snowflake_dialect.add(
# In snowflake, these are case sensitive even though they're not quoted
# so they need a different `name` and `type` so they're not picked up
Expand Down Expand Up @@ -149,26 +199,40 @@
),
# We use a RegexParser instead of keywords as some (those with dashes) require
# quotes:
WarehouseSize=RegexParser(
r"'?XSMALL'?|'?SMALL'?|'?MEDIUM'?|'?LARGE'?|'?XLARGE'?|'?XXLARGE'?|'?X2LARGE'?|"
r"'?XXXLARGE'?|'?X3LARGE'?|'?X4LARGE'?|'?X5LARGE|'?X6LARGE'?|"
r"'X-SMALL'|'X-LARGE'|'2X-LARGE'|'3X-LARGE'|'4X-LARGE'|'5X-LARGE'|'6X-LARGE'",
CodeSegment,
name="warehouse_size",
type="warehouse_size",
WarehouseSize=OneOf(
MultiStringParser(
[
size
for size in snowflake_dialect.sets("warehouse_sizes")
if "-" not in size
],
CodeSegment,
name="warehouse_size",
type="warehouse_size",
),
MultiStringParser(
[
f"'{size}'"
for size in snowflake_dialect.sets("warehouse_sizes")
if "-" not in size
],
CodeSegment,
name="warehouse_size",
type="warehouse_size",
),
),
# We use a RegexParser instead of keywords as the arguments are optionally quoted.
CompressionType=OneOf(
RegexParser(
r"'(AUTO|AUTO_DETECT|GZIP|BZ2|BROTLI|ZSTD|DEFLATE|RAW_DEFLATE|LZO|NONE"
r"|SNAPPY)'",
MultiStringParser(
snowflake_dialect.sets("compression_types"),
CodeSegment,
name="compression_type",
type="keyword",
),
RegexParser(
r"(AUTO|AUTO_DETECT|GZIP|BZ2|BROTLI|ZSTD|DEFLATE|RAW_DEFLATE|LZO|NONE"
r"|SNAPPY)",
MultiStringParser(
[
f"'{compression}'"
for compression in snowflake_dialect.sets("compression_types")
],
CodeSegment,
name="compression_type",
type="keyword",
Expand Down Expand Up @@ -241,35 +305,43 @@
name="unquoted_file_path",
type="unquoted_file_path",
),
SnowflakeEncryptionOption=RegexParser(
r"'SNOWFLAKE_FULL'|'SNOWFLAKE_SSE'",
SnowflakeEncryptionOption=MultiStringParser(
["'SNOWFLAKE_FULL'", "'SNOWFLAKE_SSE'"],
CodeSegment,
name="snowflake_encryption_option",
type="stage_encryption_option",
),
S3EncryptionOption=RegexParser(
r"'AWS_CSE'|'AWS_SSE_S3'|'AWS_SSE_KMS'",
S3EncryptionOption=MultiStringParser(
["'AWS_CSE'", "'AWS_SSE_S3'", "'AWS_SSE_KMS'"],
CodeSegment,
name="s3_encryption_option",
type="stage_encryption_option",
),
GCSEncryptionOption=RegexParser(
r"'GCS_SSE_KMS'",
GCSEncryptionOption=StringParser(
"'GCS_SSE_KMS'",
CodeSegment,
name="gcs_encryption_option",
type="stage_encryption_option",
),
AzureBlobStorageEncryptionOption=RegexParser(
r"'AZURE_CSE'",
AzureBlobStorageEncryptionOption=StringParser(
"'AZURE_CSE'",
CodeSegment,
name="azure_blob_storage_encryption_option",
type="stage_encryption_option",
),
FileType=RegexParser(
r"'?CSV'?|'?JSON'?|'?AVRO'?|'?ORC'?|'?PARQUET'?|'?XML'?",
CodeSegment,
name="file_type",
type="file_type",
FileType=OneOf(
MultiStringParser(
snowflake_dialect.sets("file_types"),
CodeSegment,
name="file_type",
type="file_type",
),
MultiStringParser(
[f"'{file_type}'" for file_type in snowflake_dialect.sets("file_types")],
CodeSegment,
name="file_type",
type="file_type",
),
),
IntegerSegment=RegexParser(
# An unquoted integer that can be passed as an argument to Snowflake functions.
Expand Down
9 changes: 5 additions & 4 deletions src/sqlfluff/dialects/dialect_sparksql.py
Expand Up @@ -33,6 +33,7 @@
StartsWith,
RegexParser,
Matchable,
MultiStringParser,
)
from sqlfluff.core.parser.segments.raw import CodeSegment, KeywordSegment
from sqlfluff.dialects.dialect_sparksql_keywords import (
Expand Down Expand Up @@ -308,11 +309,11 @@
EqualsSegment_b=StringParser(
"<=>", SymbolSegment, name="equals", type="comparison_operator"
),
FileKeywordSegment=RegexParser(
"FILES?", KeywordSegment, name="file", type="file_keyword"
FileKeywordSegment=MultiStringParser(
["FILE", "FILES"], KeywordSegment, name="file", type="file_keyword"
),
JarKeywordSegment=RegexParser(
"JARS?", KeywordSegment, name="jar", type="file_keyword"
JarKeywordSegment=MultiStringParser(
["JAR", "JARS"], KeywordSegment, name="jar", type="file_keyword"
),
NoscanKeywordSegment=StringParser(
"NOSCAN", KeywordSegment, name="noscan_keyword", type="keyword"
Expand Down

0 comments on commit b792eec

Please sign in to comment.