Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ packages = [

[tool.poetry.dependencies]
python = "^3.10"
sqlglot = "^30.6.0"
sqlglot = "^30.7.0"

[tool.poetry.group.dev.dependencies]
coverage = {extras = ["toml"], version = "^7.13"}
Expand Down
11 changes: 9 additions & 2 deletions sql_metadata/comments.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,11 @@ def _choose_tokenizer(sql: str) -> Tokenizer:
if "#" in sql and not _has_hash_variables(sql):
from sqlglot.dialects.mysql import MySQL

return MySQL.Tokenizer()
# Pass dialect="mysql" so the cached TokenizerCore (sqlglot >=30.7.0
# caches per-class) is built with MySQL identifier semantics — without
# it the cache is primed from the default dialect and later mysql
# parses misclassify e.g. ``0020_big_table`` as NUMBER + VAR.
return MySQL.Tokenizer(dialect="mysql")
return Tokenizer()


Expand Down Expand Up @@ -167,7 +171,10 @@ def strip_comments_for_parsing(sql: str) -> str:
else:
from sqlglot.dialects.mysql import MySQL

tokenizer = MySQL.Tokenizer()
# See _choose_tokenizer — the explicit dialect prevents the
# sqlglot >=30.7.0 TokenizerCore cache from being primed with
# default-dialect semantics.
tokenizer = MySQL.Tokenizer(dialect="mysql")
try:
tokens = list(tokenizer.tokenize(sql))
except TokenError:
Expand Down