diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8ab5681..88398ac 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,8 +5,6 @@ repos: rev: v1.16.23 hooks: - id: typos - # needed because .typos.toml is ignored see https://github.com/crate-ci/typos/issues/347 - exclude: CHANGELOG.md # formatters and linters are available in the virtualenv so they can be run from the makefile & vscode - repo: local hooks: diff --git a/fakesnow/transforms.py b/fakesnow/transforms.py index 9355f14..8aeff44 100644 --- a/fakesnow/transforms.py +++ b/fakesnow/transforms.py @@ -355,9 +355,13 @@ def regex_substr(expression: exp.Expression) -> exp.Expression: # which occurrence of the pattern to match try: - occurrence = expression.args["occurrence"] + occurrence = int(expression.args["occurrence"].this) except KeyError: - occurrence = exp.Literal(this="1", is_string=False) + occurrence = 1 + + # the duckdb dialect increments bracket (ie: index) expressions by 1 because duckdb is 1-indexed, + # so we need to compensate by subtracting 1 + occurrence = exp.Literal(this=str(occurrence - 1), is_string=False) try: regex_parameters_value = str(expression.args["parameters"].this) diff --git a/pyproject.toml b/pyproject.toml index 05598c6..64490b4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ dependencies = [ "duckdb~=0.9.2", "pyarrow", "snowflake-connector-python", - "sqlglot~=19.5.1", + "sqlglot~=20.4.0", ] [project.urls] diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 22515a6..bf0bc6b 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -150,7 +150,7 @@ def test_regex_substr() -> None: assert ( sqlglot.parse_one("SELECT regexp_substr(string1, 'the\\\\W+\\\\w+')", read="snowflake") .transform(regex_substr) - .sql() + .sql(dialect="duckdb") == "SELECT REGEXP_EXTRACT_ALL(string1[1 : ], 'the\\W+\\w+', 0, '')[1]" )