diff --git a/poetry.lock b/poetry.lock index f42e236e..74af111b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -483,20 +483,20 @@ files = [ [[package]] name = "sqlglot" -version = "30.6.0" +version = "30.7.0" description = "An easily customizable SQL parser and transpiler" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "sqlglot-30.6.0-py3-none-any.whl", hash = "sha256:e005fc2f47994f90d7d8df341f1cbe937518497b0b7b1507d4c03c4c9dfd2778"}, - {file = "sqlglot-30.6.0.tar.gz", hash = "sha256:246d34d39927422a50a3fa155f37b2f6346fba85f1a755b13c941eb32ef93361"}, + {file = "sqlglot-30.7.0-py3-none-any.whl", hash = "sha256:30421efcf3d57f95e57deaa755f65976c8b10735923f79986595dea912dc8206"}, + {file = "sqlglot-30.7.0.tar.gz", hash = "sha256:eaf90c7d61978ce98fb52718b7a578054bd0cebcc9ab6f3818ad4391ea9d6b69"}, ] [package.extras] -c = ["sqlglotc (==30.6.0)"] -dev = ["duckdb (>=0.6)", "pandas", "pandas-stubs", "pdoc", "pre-commit", "pyperf", "python-dateutil", "pytz", "ruff (==0.15.6)", "setuptools_scm", "sqlglot-mypy", "types-python-dateutil", "types-pytz", "typing_extensions"] -rs = ["sqlglotc (==30.6.0)", "sqlglotrs (==0.13.0)"] +c = ["sqlglotc (==30.7.0) ; python_version >= \"3.10\""] +dev = ["duckdb (>=0.6)", "mypy ; python_version < \"3.10\"", "pandas", "pandas-stubs", "pdoc", "pre-commit", "pyperf", "python-dateutil", "pytz", "ruff (==0.15.6)", "setuptools_scm", "sqlglot-mypy (>=1.20.0.post6) ; python_version >= \"3.10\"", "types-python-dateutil", "types-pytz", "typing_extensions"] +rs = ["sqlglotc (==30.7.0) ; python_version >= \"3.10\"", "sqlglotrs (==0.13.0)"] [[package]] name = "tomli" @@ -556,4 +556,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = "^3.10" -content-hash = "958b9c268d38b541599940e5de20b9f9283bbf11d523cc70f898941dee6f1b3f" +content-hash = "d14c2ddfc756ff38619c9f1d7f6090aee0c4bf02a074f486d87cc4d592d8aee0" diff --git a/pyproject.toml b/pyproject.toml index 426fc486..43ee0f35 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ packages = [ [tool.poetry.dependencies] python = "^3.10" -sqlglot = "^30.6.0" +sqlglot = "^30.7.0" [tool.poetry.group.dev.dependencies] coverage = {extras = ["toml"], version = "^7.13"} diff --git a/sql_metadata/comments.py b/sql_metadata/comments.py index 4291d3f2..eb6d14df 100644 --- a/sql_metadata/comments.py +++ b/sql_metadata/comments.py @@ -41,7 +41,11 @@ def _choose_tokenizer(sql: str) -> Tokenizer: if "#" in sql and not _has_hash_variables(sql): from sqlglot.dialects.mysql import MySQL - return MySQL.Tokenizer() + # Pass dialect="mysql" so the cached TokenizerCore (sqlglot >=30.7.0 + # caches per-class) is built with MySQL identifier semantics — without + # it the cache is primed from the default dialect and later mysql + # parses misclassify e.g. ``0020_big_table`` as NUMBER + VAR. + return MySQL.Tokenizer(dialect="mysql") return Tokenizer() @@ -167,7 +171,10 @@ def strip_comments_for_parsing(sql: str) -> str: else: from sqlglot.dialects.mysql import MySQL - tokenizer = MySQL.Tokenizer() + # See _choose_tokenizer — the explicit dialect prevents the + # sqlglot >=30.7.0 TokenizerCore cache from being primed with + # default-dialect semantics. + tokenizer = MySQL.Tokenizer(dialect="mysql") try: tokens = list(tokenizer.tokenize(sql)) except TokenError: