From 6e45b10280f8f7f6730c0c23211eda60b4c7629a Mon Sep 17 00:00:00 2001 From: Justin Joyce Date: Mon, 25 Jul 2022 15:44:11 +0100 Subject: [PATCH 1/4] BUG/#312 --- opteryx/engine/planner/temporal.py | 55 ++++++++++++++----------- opteryx/version.py | 2 +- tests/sql_battery/test_battery_shape.py | 6 +-- 3 files changed, 35 insertions(+), 28 deletions(-) diff --git a/opteryx/engine/planner/temporal.py b/opteryx/engine/planner/temporal.py index 8ac23ad9..8eff96ae 100644 --- a/opteryx/engine/planner/temporal.py +++ b/opteryx/engine/planner/temporal.py @@ -13,7 +13,7 @@ """ This compensates for missing temporal table support in the SQL parser. -For information on temporal tables see: +For information on temporal tables see: https://blog.devgenius.io/a-query-in-time-introduction-to-sql-server-temporal-tables-145ddb1355d9 This supports the following syntaxes @@ -53,13 +53,14 @@ r"WITH", ] +COMBINE_WHITESPACE_REGEX = re.compile(r"\s+") + def clean_statement(string): """ Remove carriage returns and all whitespace to single spaces """ - _RE_COMBINE_WHITESPACE = re.compile(r"\s+") - return _RE_COMBINE_WHITESPACE.sub(" ", string).strip().upper() + return COMBINE_WHITESPACE_REGEX.sub(" ", string).strip().upper() def sql_parts(string): @@ -110,42 +111,42 @@ def _subtract_one_month(in_date): def parse_range(fixed_range): fixed_range = fixed_range.upper() - TODAY = datetime.date.today() + today = datetime.datetime.utcnow().date() if fixed_range in ("PREVIOUS_MONTH", "LAST_MONTH"): # end the day before the first of this month - end = TODAY.replace(day=1) - datetime.timedelta(days=1) + end = today.replace(day=1) - datetime.timedelta(days=1) # start the first day of that month start = end.replace(day=1) elif fixed_range == "THIS_MONTH": # start the first day of this month - start = TODAY.replace(day=1) + start = today.replace(day=1) # end today - end = TODAY + end = today elif fixed_range in ("PREVIOUS_CYCLE", "LAST_CYCLE"): # if we're before the 21st - if TODAY.day < 22: + if today.day < 22: # end the 21st of last month - end = _subtract_one_month(TODAY).replace(day=21) + end = _subtract_one_month(today).replace(day=21) # start the 22nd of the month before start = _subtract_one_month(end).replace(day=22) else: # end the 21st of this month - end = TODAY.replace(day=21) + end = today.replace(day=21) # start the 22nd of the month before start = _subtract_one_month(end).replace(day=22) elif fixed_range == "THIS_CYCLE": # if we're before the 21st - if TODAY.day < 22: + if today.day < 22: # end today - end = TODAY + end = today # start the 22nd of last month - start = _subtract_one_month(TODAY).replace(day=22) + start = _subtract_one_month(today).replace(day=22) else: # end the today - end = TODAY + end = today # start the 22nd of this month - start = TODAY.replace(day=22) + start = today.replace(day=22) else: raise SqlError(f"Unknown temporal range `{fixed_range}`") @@ -156,12 +157,12 @@ def parse_range(fixed_range): def parse_date(date): date = date.upper() - TODAY = datetime.date.today() + today = datetime.datetime.utcnow().date() if date == "TODAY": - return TODAY + return today if date == "YESTERDAY": - return TODAY - datetime.timedelta(days=1) + return today - datetime.timedelta(days=1) parsed_date = dates.parse_iso(date[1:-1]) if parsed_date: @@ -175,10 +176,10 @@ def extract_temporal_filters(sql): clean_sql = clean_statement(clean_sql) parts = sql_parts(clean_sql) - TODAY = datetime.date.today() + today = datetime.datetime.utcnow().date() clearing_regex = None - start_date = TODAY - end_date = TODAY + start_date = today + end_date = today try: pos = parts.index("FOR") # this fails when there is no temporal clause @@ -196,6 +197,13 @@ def extract_temporal_filters(sql): start_date = parse_date(parts[2]) end_date = parse_date(parts[4]) + if start_date is None or end_date is None: + raise SqlError("Unrecognized temporal range values.") + if start_date > end_date: + raise SqlError( + "Invalid temporal range, start of range is after end of range." + ) + clearing_regex = ( r"(FOR[\n\r\s]+DATES[\n\r\s]+BETWEEN[\n\r\s]+" + parts[2] @@ -215,9 +223,8 @@ def extract_temporal_filters(sql): regex = re.compile(clearing_regex, re.MULTILINE | re.DOTALL | re.IGNORECASE) sql = regex.sub("\n-- FOR STATEMENT REMOVED\n", sql) - # swap the order if we need to - if start_date > end_date: - start_date, end_date = end_date, start_date + except SqlError as sql_error: + raise sql_error except Exception as e: pass diff --git a/opteryx/version.py b/opteryx/version.py index 5fff2558..2ba3a7c4 100644 --- a/opteryx/version.py +++ b/opteryx/version.py @@ -16,4 +16,4 @@ 2) we can import it in setup.py for the same reason """ -__version__ = "0.2.0-beta.6" +__version__ = "0.2.0-beta.7" diff --git a/tests/sql_battery/test_battery_shape.py b/tests/sql_battery/test_battery_shape.py index 765a0754..6ba91c85 100644 --- a/tests/sql_battery/test_battery_shape.py +++ b/tests/sql_battery/test_battery_shape.py @@ -439,9 +439,9 @@ # FRAME HANDLING ("SELECT * FROM tests.data.framed FOR '2021-03-28'", 100000, 1), ("SELECT * FROM tests.data.framed FOR '2021-03-29'", 100000, 1), - ("SELECT * FROM tests.data.framed FOR DATES BETWEEN '2021-03-28' AND '2021-03-29", 200000, 1), - ("SELECT * FROM tests.data.framed FOR DATES BETWEEN '2021-03-29' AND '2021-03-30", 100000, 1), - ("SELECT * FROM tests.data.framed FOR DATES BETWEEN '2021-03-28' AND '2021-03-30", 200000, 1), + ("SELECT * FROM tests.data.framed FOR DATES BETWEEN '2021-03-28' AND '2021-03-29'", 200000, 1), + ("SELECT * FROM tests.data.framed FOR DATES BETWEEN '2021-03-29' AND '2021-03-30'", 100000, 1), + ("SELECT * FROM tests.data.framed FOR DATES BETWEEN '2021-03-28' AND '2021-03-30'", 200000, 1), # DOESN'T WORK WITH LARGE DATASETS (#179) ("SELECT * FROM (SELECT COUNT(*), column_1 FROM FAKE(5000,2) GROUP BY column_1 ORDER BY COUNT(*)) LIMIT 5", 5, 2), # FILTER CREATION FOR 3 OR MORE ANDED PREDICATES FAILS (#182) From 2ca904e3868034696c8abe8a425db827c37a42e2 Mon Sep 17 00:00:00 2001 From: Justin Joyce Date: Mon, 25 Jul 2022 15:46:40 +0100 Subject: [PATCH 2/4] FIX/#312 --- tests/sql_battery/test_expect_to_fail.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/sql_battery/test_expect_to_fail.py b/tests/sql_battery/test_expect_to_fail.py index e81a5f28..6369cff2 100644 --- a/tests/sql_battery/test_expect_to_fail.py +++ b/tests/sql_battery/test_expect_to_fail.py @@ -40,6 +40,13 @@ # JOIN hints aren't supported ("SELECT * FROM $satellites INNER HASH JOIN $planets USING (id)"), + + # Invalid temporal ranges + ("SELECT * FROM t FOR 2022-01-01"), + ("SELECT * FROM t FOR DATES IN 2022"), + ("SELECT * FROM t FOR DATES BETWEEN 2022-01-01 AND TODAY"), + ("SELECT * FROM t FOR DATES BETWEEN today AND yesterday"), + ] # fmt:on From 129d50348e5788b11e8ef59290c3ca4b0471f161 Mon Sep 17 00:00:00 2001 From: Justin Joyce Date: Mon, 25 Jul 2022 15:47:21 +0100 Subject: [PATCH 3/4] BUG/#312 --- tests/sql_battery/test_expect_to_fail.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/sql_battery/test_expect_to_fail.py b/tests/sql_battery/test_expect_to_fail.py index 6369cff2..b7623a6b 100644 --- a/tests/sql_battery/test_expect_to_fail.py +++ b/tests/sql_battery/test_expect_to_fail.py @@ -42,10 +42,10 @@ ("SELECT * FROM $satellites INNER HASH JOIN $planets USING (id)"), # Invalid temporal ranges - ("SELECT * FROM t FOR 2022-01-01"), - ("SELECT * FROM t FOR DATES IN 2022"), - ("SELECT * FROM t FOR DATES BETWEEN 2022-01-01 AND TODAY"), - ("SELECT * FROM t FOR DATES BETWEEN today AND yesterday"), + ("SELECT * FROM $planets FOR 2022-01-01"), + ("SELECT * FROM $planets FOR DATES IN 2022"), + ("SELECT * FROM $planets FOR DATES BETWEEN 2022-01-01 AND TODAY"), + ("SELECT * FROM $planets FOR DATES BETWEEN today AND yesterday"), ] # fmt:on From d8a1031d4d118e497b1de8defefdd03dff1756de Mon Sep 17 00:00:00 2001 From: Justin Joyce Date: Mon, 25 Jul 2022 15:51:16 +0100 Subject: [PATCH 4/4] FIX/#312 --- docs/Release Notes/Change Log.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Release Notes/Change Log.md b/docs/Release Notes/Change Log.md index 71139c56..9e068235 100644 --- a/docs/Release Notes/Change Log.md +++ b/docs/Release Notes/Change Log.md @@ -30,7 +30,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [[#277](https://github.com/mabel-dev/opteryx/issues/277)] Cache errors should be transparent. ([@joocer](https://github.com/joocer)) - [[#285](https://github.com/mabel-dev/opteryx/issues/285)] `DISTINCT` on nulls throws error. ([@joocer](https://github.com/joocer)) - [[#281](https://github.com/mabel-dev/opteryx/issues/281)] `SELECT` on empty aggregates reports missing columns. ([@joocer](https://github.com/joocer)) - +- [[#312](https://github.com/mabel-dev/opteryx/issues/312)] Invalid dates in `FOR` clauses treated as `TODAY`. ([@joocer](https://github.com/joocer)) ### [0.1.0] - 2022-07-02