Skip to content

Commit

Permalink
feat!: Introducing TIMESTAMP_NTZ token and data type (#3386)
Browse files Browse the repository at this point in the history
* feat!: Introducing TIMESTAMP_NTZ data type

* PR Feedback 1

* Change default generation of TIMESTAMP_NTZ

* Update tests/dialects/test_spark.py

---------

Co-authored-by: Jo <46752250+georgesittas@users.noreply.github.com>
  • Loading branch information
VaggelisD and georgesittas committed May 2, 2024
1 parent f44cd24 commit d1b4f1f
Show file tree
Hide file tree
Showing 11 changed files with 61 additions and 33 deletions.
4 changes: 0 additions & 4 deletions sqlglot/dialects/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,10 +716,7 @@ class Tokenizer(tokens.Tokenizer):
"SQL_DOUBLE": TokenType.DOUBLE,
"SQL_VARCHAR": TokenType.VARCHAR,
"STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION,
"TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ,
"TIMESTAMP_NTZ": TokenType.TIMESTAMP,
"TIMESTAMP_TZ": TokenType.TIMESTAMPTZ,
"TIMESTAMPNTZ": TokenType.TIMESTAMP,
"TOP": TokenType.TOP,
}

Expand Down Expand Up @@ -848,7 +845,6 @@ class Generator(generator.Generator):
**generator.Generator.TYPE_MAPPING,
exp.DataType.Type.NESTED: "OBJECT",
exp.DataType.Type.STRUCT: "OBJECT",
exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ",
}

STAR_MAPPING = {
Expand Down
6 changes: 5 additions & 1 deletion sqlglot/dialects/spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from sqlglot import exp
from sqlglot.dialects.dialect import rename_func, unit_to_var
from sqlglot.dialects.hive import _build_with_ignore_nulls
from sqlglot.dialects.spark2 import Spark2, temporary_storage_provider
from sqlglot.dialects.spark2 import Spark2, temporary_storage_provider, _build_as_cast
from sqlglot.helper import ensure_list, seq_get
from sqlglot.transforms import (
ctas_with_tmp_tables_to_create_tmp_view,
Expand Down Expand Up @@ -63,6 +63,8 @@ class Parser(Spark2.Parser):
**Spark2.Parser.FUNCTIONS,
"ANY_VALUE": _build_with_ignore_nulls(exp.AnyValue),
"DATEDIFF": _build_datediff,
"TIMESTAMP_LTZ": _build_as_cast("TIMESTAMP_LTZ"),
"TIMESTAMP_NTZ": _build_as_cast("TIMESTAMP_NTZ"),
"TRY_ELEMENT_AT": lambda args: exp.Bracket(
this=seq_get(args, 0), expressions=ensure_list(seq_get(args, 1)), safe=True
),
Expand All @@ -88,6 +90,8 @@ class Generator(Spark2.Generator):
exp.DataType.Type.MONEY: "DECIMAL(15, 4)",
exp.DataType.Type.SMALLMONEY: "DECIMAL(6, 4)",
exp.DataType.Type.UNIQUEIDENTIFIER: "STRING",
exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP_LTZ",
exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP_NTZ",
}

TRANSFORMS = {
Expand Down
2 changes: 2 additions & 0 deletions sqlglot/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3934,6 +3934,7 @@ class Type(AutoName):
TIME = auto()
TIMETZ = auto()
TIMESTAMP = auto()
TIMESTAMPNTZ = auto()
TIMESTAMPLTZ = auto()
TIMESTAMPTZ = auto()
TIMESTAMP_S = auto()
Expand Down Expand Up @@ -4035,6 +4036,7 @@ class Type(AutoName):
Type.DATETIME64,
Type.TIME,
Type.TIMESTAMP,
Type.TIMESTAMPNTZ,
Type.TIMESTAMPLTZ,
Type.TIMESTAMPTZ,
Type.TIMESTAMP_MS,
Expand Down
1 change: 1 addition & 0 deletions sqlglot/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ class Parser(metaclass=_Parser):
TokenType.TIMESTAMP_NS,
TokenType.TIMESTAMPTZ,
TokenType.TIMESTAMPLTZ,
TokenType.TIMESTAMPNTZ,
TokenType.DATETIME,
TokenType.DATETIME64,
TokenType.DATE,
Expand Down
4 changes: 4 additions & 0 deletions sqlglot/tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ class TokenType(AutoName):
TIMESTAMP = auto()
TIMESTAMPTZ = auto()
TIMESTAMPLTZ = auto()
TIMESTAMPNTZ = auto()
TIMESTAMP_S = auto()
TIMESTAMP_MS = auto()
TIMESTAMP_NS = auto()
Expand Down Expand Up @@ -847,6 +848,9 @@ class Tokenizer(metaclass=_Tokenizer):
"TIMESTAMP": TokenType.TIMESTAMP,
"TIMESTAMPTZ": TokenType.TIMESTAMPTZ,
"TIMESTAMPLTZ": TokenType.TIMESTAMPLTZ,
"TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ,
"TIMESTAMPNTZ": TokenType.TIMESTAMPNTZ,
"TIMESTAMP_NTZ": TokenType.TIMESTAMPNTZ,
"DATE": TokenType.DATE,
"DATETIME": TokenType.DATETIME,
"INT4RANGE": TokenType.INT4RANGE,
Expand Down
4 changes: 2 additions & 2 deletions tests/dialects/test_hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ def test_time(self):
"hive": "DATE_ADD('2020-01-01', 1)",
"presto": "DATE_ADD('DAY', 1, CAST(CAST('2020-01-01' AS TIMESTAMP) AS DATE))",
"redshift": "DATEADD(DAY, 1, '2020-01-01')",
"snowflake": "DATEADD(DAY, 1, CAST(CAST('2020-01-01' AS TIMESTAMPNTZ) AS DATE))",
"snowflake": "DATEADD(DAY, 1, CAST(CAST('2020-01-01' AS TIMESTAMP) AS DATE))",
"spark": "DATE_ADD('2020-01-01', 1)",
"tsql": "DATEADD(DAY, 1, CAST(CAST('2020-01-01' AS DATETIME2) AS DATE))",
},
Expand All @@ -348,7 +348,7 @@ def test_time(self):
"hive": "DATE_ADD('2020-01-01', 1 * -1)",
"presto": "DATE_ADD('DAY', 1 * -1, CAST(CAST('2020-01-01' AS TIMESTAMP) AS DATE))",
"redshift": "DATEADD(DAY, 1 * -1, '2020-01-01')",
"snowflake": "DATEADD(DAY, 1 * -1, CAST(CAST('2020-01-01' AS TIMESTAMPNTZ) AS DATE))",
"snowflake": "DATEADD(DAY, 1 * -1, CAST(CAST('2020-01-01' AS TIMESTAMP) AS DATE))",
"spark": "DATE_ADD('2020-01-01', 1 * -1)",
"tsql": "DATEADD(DAY, 1 * -1, CAST(CAST('2020-01-01' AS DATETIME2) AS DATE))",
},
Expand Down
18 changes: 9 additions & 9 deletions tests/dialects/test_mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,63 +466,63 @@ def test_date_format(self):
"SELECT DATE_FORMAT('2017-06-15', '%Y')",
write={
"mysql": "SELECT DATE_FORMAT('2017-06-15', '%Y')",
"snowflake": "SELECT TO_CHAR(CAST('2017-06-15' AS TIMESTAMPNTZ), 'yyyy')",
"snowflake": "SELECT TO_CHAR(CAST('2017-06-15' AS TIMESTAMP), 'yyyy')",
},
)
self.validate_all(
"SELECT DATE_FORMAT('2017-06-15', '%m')",
write={
"mysql": "SELECT DATE_FORMAT('2017-06-15', '%m')",
"snowflake": "SELECT TO_CHAR(CAST('2017-06-15' AS TIMESTAMPNTZ), 'mm')",
"snowflake": "SELECT TO_CHAR(CAST('2017-06-15' AS TIMESTAMP), 'mm')",
},
)
self.validate_all(
"SELECT DATE_FORMAT('2017-06-15', '%d')",
write={
"mysql": "SELECT DATE_FORMAT('2017-06-15', '%d')",
"snowflake": "SELECT TO_CHAR(CAST('2017-06-15' AS TIMESTAMPNTZ), 'DD')",
"snowflake": "SELECT TO_CHAR(CAST('2017-06-15' AS TIMESTAMP), 'DD')",
},
)
self.validate_all(
"SELECT DATE_FORMAT('2017-06-15', '%Y-%m-%d')",
write={
"mysql": "SELECT DATE_FORMAT('2017-06-15', '%Y-%m-%d')",
"snowflake": "SELECT TO_CHAR(CAST('2017-06-15' AS TIMESTAMPNTZ), 'yyyy-mm-DD')",
"snowflake": "SELECT TO_CHAR(CAST('2017-06-15' AS TIMESTAMP), 'yyyy-mm-DD')",
},
)
self.validate_all(
"SELECT DATE_FORMAT('2017-06-15 22:23:34', '%H')",
write={
"mysql": "SELECT DATE_FORMAT('2017-06-15 22:23:34', '%H')",
"snowflake": "SELECT TO_CHAR(CAST('2017-06-15 22:23:34' AS TIMESTAMPNTZ), 'hh24')",
"snowflake": "SELECT TO_CHAR(CAST('2017-06-15 22:23:34' AS TIMESTAMP), 'hh24')",
},
)
self.validate_all(
"SELECT DATE_FORMAT('2017-06-15', '%w')",
write={
"mysql": "SELECT DATE_FORMAT('2017-06-15', '%w')",
"snowflake": "SELECT TO_CHAR(CAST('2017-06-15' AS TIMESTAMPNTZ), 'dy')",
"snowflake": "SELECT TO_CHAR(CAST('2017-06-15' AS TIMESTAMP), 'dy')",
},
)
self.validate_all(
"SELECT DATE_FORMAT('2009-10-04 22:23:00', '%W %M %Y')",
write={
"mysql": "SELECT DATE_FORMAT('2009-10-04 22:23:00', '%W %M %Y')",
"snowflake": "SELECT TO_CHAR(CAST('2009-10-04 22:23:00' AS TIMESTAMPNTZ), 'DY mmmm yyyy')",
"snowflake": "SELECT TO_CHAR(CAST('2009-10-04 22:23:00' AS TIMESTAMP), 'DY mmmm yyyy')",
},
)
self.validate_all(
"SELECT DATE_FORMAT('2007-10-04 22:23:00', '%H:%i:%s')",
write={
"mysql": "SELECT DATE_FORMAT('2007-10-04 22:23:00', '%T')",
"snowflake": "SELECT TO_CHAR(CAST('2007-10-04 22:23:00' AS TIMESTAMPNTZ), 'hh24:mi:ss')",
"snowflake": "SELECT TO_CHAR(CAST('2007-10-04 22:23:00' AS TIMESTAMP), 'hh24:mi:ss')",
},
)
self.validate_all(
"SELECT DATE_FORMAT('1900-10-04 22:23:00', '%d %y %a %d %m %b')",
write={
"mysql": "SELECT DATE_FORMAT('1900-10-04 22:23:00', '%d %y %W %d %m %b')",
"snowflake": "SELECT TO_CHAR(CAST('1900-10-04 22:23:00' AS TIMESTAMPNTZ), 'DD yy DY DD mm mon')",
"snowflake": "SELECT TO_CHAR(CAST('1900-10-04 22:23:00' AS TIMESTAMP), 'DD yy DY DD mm mon')",
},
)

Expand Down
2 changes: 1 addition & 1 deletion tests/dialects/test_postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ def test_postgres(self):
write={
"postgres": "SELECT EXTRACT(minute FROM CAST('2023-01-04 04:05:06.789' AS TIMESTAMP))",
"redshift": "SELECT EXTRACT(minute FROM CAST('2023-01-04 04:05:06.789' AS TIMESTAMP))",
"snowflake": "SELECT DATE_PART(minute, CAST('2023-01-04 04:05:06.789' AS TIMESTAMPNTZ))",
"snowflake": "SELECT DATE_PART(minute, CAST('2023-01-04 04:05:06.789' AS TIMESTAMP))",
},
)
self.validate_all(
Expand Down
4 changes: 2 additions & 2 deletions tests/dialects/test_redshift.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def test_redshift(self):
write={
"postgres": "SELECT EXTRACT(minute FROM CAST('2023-01-04 04:05:06.789' AS TIMESTAMP))",
"redshift": "SELECT EXTRACT(minute FROM CAST('2023-01-04 04:05:06.789' AS TIMESTAMP))",
"snowflake": "SELECT DATE_PART(minute, CAST('2023-01-04 04:05:06.789' AS TIMESTAMPNTZ))",
"snowflake": "SELECT DATE_PART(minute, CAST('2023-01-04 04:05:06.789' AS TIMESTAMP))",
},
)
self.validate_all(
Expand Down Expand Up @@ -271,7 +271,7 @@ def test_redshift(self):
"postgres": "SELECT CAST('2008-02-28' AS TIMESTAMP) + INTERVAL '18 MONTH'",
"presto": "SELECT DATE_ADD('MONTH', 18, CAST('2008-02-28' AS TIMESTAMP))",
"redshift": "SELECT DATEADD(MONTH, 18, '2008-02-28')",
"snowflake": "SELECT DATEADD(MONTH, 18, CAST('2008-02-28' AS TIMESTAMPNTZ))",
"snowflake": "SELECT DATEADD(MONTH, 18, CAST('2008-02-28' AS TIMESTAMP))",
"tsql": "SELECT DATEADD(MONTH, 18, CAST('2008-02-28' AS DATETIME2))",
},
)
Expand Down
28 changes: 16 additions & 12 deletions tests/dialects/test_snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ def test_snowflake(self):
"SELECT DATE_PART('year', TIMESTAMP '2020-01-01')",
write={
"hive": "SELECT EXTRACT(year FROM CAST('2020-01-01' AS TIMESTAMP))",
"snowflake": "SELECT DATE_PART('year', CAST('2020-01-01' AS TIMESTAMPNTZ))",
"snowflake": "SELECT DATE_PART('year', CAST('2020-01-01' AS TIMESTAMP))",
"spark": "SELECT EXTRACT(year FROM CAST('2020-01-01' AS TIMESTAMP))",
},
)
Expand Down Expand Up @@ -597,7 +597,7 @@ def test_snowflake(self):
self.validate_all(
"SELECT DAYOFWEEK('2016-01-02T23:39:20.123-07:00'::TIMESTAMP)",
write={
"snowflake": "SELECT DAYOFWEEK(CAST('2016-01-02T23:39:20.123-07:00' AS TIMESTAMPNTZ))",
"snowflake": "SELECT DAYOFWEEK(CAST('2016-01-02T23:39:20.123-07:00' AS TIMESTAMP))",
},
)
self.validate_all(
Expand Down Expand Up @@ -695,7 +695,7 @@ def test_snowflake(self):
"SELECT TO_TIMESTAMP('2013-04-05 01:02:03')",
write={
"bigquery": "SELECT CAST('2013-04-05 01:02:03' AS DATETIME)",
"snowflake": "SELECT CAST('2013-04-05 01:02:03' AS TIMESTAMPNTZ)",
"snowflake": "SELECT CAST('2013-04-05 01:02:03' AS TIMESTAMP)",
"spark": "SELECT CAST('2013-04-05 01:02:03' AS TIMESTAMP)",
},
)
Expand Down Expand Up @@ -957,12 +957,16 @@ def test_timestamps(self):
self.validate_identity("SELECT CAST('12:00:00' AS TIME)")
self.validate_identity("SELECT DATE_PART(month, a)")

self.validate_all(
"SELECT CAST(a AS TIMESTAMP)",
write={
"snowflake": "SELECT CAST(a AS TIMESTAMPNTZ)",
},
)
for data_type in (
"TIMESTAMP",
"TIMESTAMPLTZ",
"TIMESTAMPNTZ",
):
self.validate_identity(f"CAST(a AS {data_type})")

self.validate_identity("CAST(a AS TIMESTAMP_NTZ)", "CAST(a AS TIMESTAMPNTZ)")
self.validate_identity("CAST(a AS TIMESTAMP_LTZ)", "CAST(a AS TIMESTAMPLTZ)")

self.validate_all(
"SELECT a::TIMESTAMP_LTZ(9)",
write={
Expand Down Expand Up @@ -1002,14 +1006,14 @@ def test_timestamps(self):
self.validate_all(
"SELECT DATE_PART(epoch_second, foo) as ddate from table_name",
write={
"snowflake": "SELECT EXTRACT(epoch_second FROM CAST(foo AS TIMESTAMPNTZ)) AS ddate FROM table_name",
"snowflake": "SELECT EXTRACT(epoch_second FROM CAST(foo AS TIMESTAMP)) AS ddate FROM table_name",
"presto": "SELECT TO_UNIXTIME(CAST(foo AS TIMESTAMP)) AS ddate FROM table_name",
},
)
self.validate_all(
"SELECT DATE_PART(epoch_milliseconds, foo) as ddate from table_name",
write={
"snowflake": "SELECT EXTRACT(epoch_second FROM CAST(foo AS TIMESTAMPNTZ)) * 1000 AS ddate FROM table_name",
"snowflake": "SELECT EXTRACT(epoch_second FROM CAST(foo AS TIMESTAMP)) * 1000 AS ddate FROM table_name",
"presto": "SELECT TO_UNIXTIME(CAST(foo AS TIMESTAMP)) * 1000 AS ddate FROM table_name",
},
)
Expand Down Expand Up @@ -1140,7 +1144,7 @@ def test_historical_data(self):
)
self.validate_identity(
"SELECT * FROM my_table AT (TIMESTAMP => 'Fri, 01 May 2015 16:20:00 -0700'::timestamp)",
"SELECT * FROM my_table AT (TIMESTAMP => CAST('Fri, 01 May 2015 16:20:00 -0700' AS TIMESTAMPNTZ))",
"SELECT * FROM my_table AT (TIMESTAMP => CAST('Fri, 01 May 2015 16:20:00 -0700' AS TIMESTAMP))",
)
self.validate_identity(
"SELECT * FROM my_table AT(TIMESTAMP => 'Fri, 01 May 2015 16:20:00 -0700'::timestamp_tz)",
Expand Down
21 changes: 19 additions & 2 deletions tests/dialects/test_spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ def test_spark(self):
"postgres": "SELECT CAST('2016-08-31' AS TIMESTAMP) AT TIME ZONE 'Asia/Seoul' AT TIME ZONE 'UTC'",
"presto": "SELECT WITH_TIMEZONE(CAST('2016-08-31' AS TIMESTAMP), 'Asia/Seoul') AT TIME ZONE 'UTC'",
"redshift": "SELECT CAST('2016-08-31' AS TIMESTAMP) AT TIME ZONE 'Asia/Seoul' AT TIME ZONE 'UTC'",
"snowflake": "SELECT CONVERT_TIMEZONE('Asia/Seoul', 'UTC', CAST('2016-08-31' AS TIMESTAMPNTZ))",
"snowflake": "SELECT CONVERT_TIMEZONE('Asia/Seoul', 'UTC', CAST('2016-08-31' AS TIMESTAMP))",
"spark": "SELECT TO_UTC_TIMESTAMP(CAST('2016-08-31' AS TIMESTAMP), 'Asia/Seoul')",
},
)
Expand Down Expand Up @@ -523,14 +523,31 @@ def test_spark(self):
},
)

for data_type in ("BOOLEAN", "DATE", "DOUBLE", "FLOAT", "INT", "TIMESTAMP"):
for data_type in (
"BOOLEAN",
"DATE",
"DOUBLE",
"FLOAT",
"INT",
"TIMESTAMP",
):
self.validate_all(
f"{data_type}(x)",
write={
"": f"CAST(x AS {data_type})",
"spark": f"CAST(x AS {data_type})",
},
)

for ts_suffix in ("NTZ", "LTZ"):
self.validate_all(
f"TIMESTAMP_{ts_suffix}(x)",
write={
"": f"CAST(x AS TIMESTAMP{ts_suffix})",
"spark": f"CAST(x AS TIMESTAMP_{ts_suffix})",
},
)

self.validate_all(
"STRING(x)",
write={
Expand Down

0 comments on commit d1b4f1f

Please sign in to comment.