Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix reading timestamptz before epoch from Parquet #12855

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -34,6 +34,7 @@
import static io.trino.spi.type.Timestamps.MICROSECONDS_PER_MILLISECOND;
import static io.trino.spi.type.Timestamps.PICOSECONDS_PER_MICROSECOND;
import static java.lang.Math.floorDiv;
import static java.lang.Math.floorMod;
import static java.lang.Math.toIntExact;
import static java.lang.String.format;

Expand Down Expand Up @@ -65,7 +66,7 @@ else if (type == TIMESTAMP_TZ_MILLIS) {
}
else if (type == TIMESTAMP_TZ_MICROS || type == TIMESTAMP_TZ_NANOS) {
long epochMillis = floorDiv(epochMicros, MICROSECONDS_PER_MILLISECOND);
int picosOfMillis = toIntExact(epochMicros % MICROSECONDS_PER_MILLISECOND) * PICOSECONDS_PER_MICROSECOND;
int picosOfMillis = toIntExact(floorMod(epochMicros, MICROSECONDS_PER_MILLISECOND)) * PICOSECONDS_PER_MICROSECOND;
type.writeObject(blockBuilder, LongTimestampWithTimeZone.fromEpochMillisAndFraction(epochMillis, picosOfMillis, UTC_KEY));
}
else if (type == BIGINT) {
Expand Down
Expand Up @@ -395,11 +395,14 @@ private void testSelectOrPartitionedByTimestampWithTimeZone(boolean partitioned)
String instant2La = "TIMESTAMP '2021-10-30 17:30:00.006000 America/Los_Angeles'";
String instant3Utc = "TIMESTAMP '2021-10-31 00:30:00.007000 UTC'";
String instant3La = "TIMESTAMP '2021-10-30 17:30:00.007000 America/Los_Angeles'";
// regression test value for https://github.com/trinodb/trino/issues/12852
String instant4Utc = "TIMESTAMP '1969-12-01 05:06:07.234567 UTC'";

assertUpdate(format("INSERT INTO %s VALUES %s", tableName, instant1Utc), 1);
assertUpdate(format("INSERT INTO %s VALUES %s", tableName, instant2La /* non-UTC for this one */), 1);
assertUpdate(format("INSERT INTO %s VALUES %s", tableName, instant3Utc), 1);
assertQuery(format("SELECT COUNT(*) from %s", tableName), "SELECT 3");
assertUpdate(format("INSERT INTO %s VALUES %s", tableName, instant4Utc), 1);
assertQuery(format("SELECT COUNT(*) from %s", tableName), "SELECT 4");

// =
assertThat(query(format("SELECT * from %s WHERE _timestamptz = %s", tableName, instant1Utc)))
Expand All @@ -414,22 +417,24 @@ private void testSelectOrPartitionedByTimestampWithTimeZone(boolean partitioned)
.matches("VALUES " + instant3Utc);
assertThat(query(format("SELECT * from %s WHERE _timestamptz = %s", tableName, instant3La)))
.matches("VALUES " + instant3Utc);
assertThat(query(format("SELECT * from %s WHERE _timestamptz = %s", tableName, instant4Utc)))
.matches("VALUES " + instant4Utc);

// <
assertThat(query(format("SELECT * from %s WHERE _timestamptz < %s", tableName, instant2Utc)))
.matches("VALUES " + instant1Utc);
.matches(format("VALUES %s, %s", instant1Utc, instant4Utc));
assertThat(query(format("SELECT * from %s WHERE _timestamptz < %s", tableName, instant2La)))
.matches("VALUES " + instant1Utc);
.matches(format("VALUES %s, %s", instant1Utc, instant4Utc));
assertThat(query(format("SELECT * from %s WHERE _timestamptz < %s", tableName, instant3Utc)))
.matches(format("VALUES %s, %s", instant1Utc, instant2Utc));
.matches(format("VALUES %s, %s, %s", instant1Utc, instant2Utc, instant4Utc));
assertThat(query(format("SELECT * from %s WHERE _timestamptz < %s", tableName, instant3La)))
.matches(format("VALUES %s, %s", instant1Utc, instant2Utc));
.matches(format("VALUES %s, %s, %s", instant1Utc, instant2Utc, instant4Utc));

// <=
assertThat(query(format("SELECT * from %s WHERE _timestamptz <= %s", tableName, instant2Utc)))
.matches(format("VALUES %s, %s", instant1Utc, instant2Utc));
.matches(format("VALUES %s, %s, %s", instant1Utc, instant2Utc, instant4Utc));
assertThat(query(format("SELECT * from %s WHERE _timestamptz <= %s", tableName, instant2La)))
.matches(format("VALUES %s, %s", instant1Utc, instant2Utc));
.matches(format("VALUES %s, %s, %s", instant1Utc, instant2Utc, instant4Utc));

// >
assertThat(query(format("SELECT * from %s WHERE _timestamptz > %s", tableName, instant2Utc)))
Expand Down Expand Up @@ -461,23 +466,27 @@ private void testSelectOrPartitionedByTimestampWithTimeZone(boolean partitioned)

// !=
assertThat(query(format("SELECT * from %s WHERE _timestamptz != %s", tableName, instant1Utc)))
.matches(format("VALUES %s, %s", instant2Utc, instant3Utc));
.matches(format("VALUES %s, %s, %s", instant2Utc, instant3Utc, instant4Utc));
assertThat(query(format("SELECT * from %s WHERE _timestamptz != %s", tableName, instant1La)))
.matches(format("VALUES %s, %s", instant2Utc, instant3Utc));
.matches(format("VALUES %s, %s, %s", instant2Utc, instant3Utc, instant4Utc));
assertThat(query(format("SELECT * from %s WHERE _timestamptz != %s", tableName, instant2Utc)))
.matches(format("VALUES %s, %s", instant1Utc, instant3Utc));
.matches(format("VALUES %s, %s, %s", instant1Utc, instant3Utc, instant4Utc));
assertThat(query(format("SELECT * from %s WHERE _timestamptz != %s", tableName, instant2La)))
.matches(format("VALUES %s, %s", instant1Utc, instant3Utc));
.matches(format("VALUES %s, %s, %s", instant1Utc, instant3Utc, instant4Utc));
assertThat(query(format("SELECT * from %s WHERE _timestamptz != %s", tableName, instant4Utc)))
.matches(format("VALUES %s, %s, %s", instant1Utc, instant2Utc, instant3Utc));

// IS DISTINCT FROM
assertThat(query(format("SELECT * from %s WHERE _timestamptz IS DISTINCT FROM %s", tableName, instant1Utc)))
.matches(format("VALUES %s, %s", instant2Utc, instant3Utc));
.matches(format("VALUES %s, %s, %s", instant2Utc, instant3Utc, instant4Utc));
assertThat(query(format("SELECT * from %s WHERE _timestamptz IS DISTINCT FROM %s", tableName, instant1La)))
.matches(format("VALUES %s, %s", instant2Utc, instant3Utc));
.matches(format("VALUES %s, %s, %s", instant2Utc, instant3Utc, instant4Utc));
assertThat(query(format("SELECT * from %s WHERE _timestamptz IS DISTINCT FROM %s", tableName, instant2Utc)))
.matches(format("VALUES %s, %s", instant1Utc, instant3Utc));
.matches(format("VALUES %s, %s, %s", instant1Utc, instant3Utc, instant4Utc));
assertThat(query(format("SELECT * from %s WHERE _timestamptz IS DISTINCT FROM %s", tableName, instant2La)))
.matches(format("VALUES %s, %s", instant1Utc, instant3Utc));
.matches(format("VALUES %s, %s, %s", instant1Utc, instant3Utc, instant4Utc));
assertThat(query(format("SELECT * from %s WHERE _timestamptz IS DISTINCT FROM %s", tableName, instant4Utc)))
.matches(format("VALUES %s, %s, %s", instant1Utc, instant2Utc, instant3Utc));

// IS NOT DISTINCT FROM
assertThat(query(format("SELECT * from %s WHERE _timestamptz IS NOT DISTINCT FROM %s", tableName, instant1Utc)))
Expand All @@ -492,25 +501,32 @@ private void testSelectOrPartitionedByTimestampWithTimeZone(boolean partitioned)
.matches("VALUES " + instant3Utc);
assertThat(query(format("SELECT * from %s WHERE _timestamptz IS NOT DISTINCT FROM %s", tableName, instant3La)))
.matches("VALUES " + instant3Utc);
assertThat(query(format("SELECT * from %s WHERE _timestamptz IS NOT DISTINCT FROM %s", tableName, instant4Utc)))
.matches("VALUES " + instant4Utc);

if (partitioned) {
assertThat(query(format("SELECT record_count, file_count, partition._timestamptz FROM \"%s$partitions\"", tableName)))
.matches(format("VALUES (BIGINT '1', BIGINT '1', %s), (BIGINT '1', BIGINT '1', %s), (BIGINT '1', BIGINT '1', %s)", instant1Utc, instant2Utc, instant3Utc));
.matches(format(
"VALUES (BIGINT '1', BIGINT '1', %s), (BIGINT '1', BIGINT '1', %s), (BIGINT '1', BIGINT '1', %s), (BIGINT '1', BIGINT '1', %s)",
instant1Utc,
instant2Utc,
instant3Utc,
instant4Utc));
}
else {
assertThat(query(format("SELECT record_count, file_count, data._timestamptz FROM \"%s$partitions\"", tableName)))
.matches(format(
"VALUES (BIGINT '3', BIGINT '3', CAST(ROW(%s, %s, 0, NULL) AS row(min timestamp(6) with time zone, max timestamp(6) with time zone, null_count bigint, nan_count bigint)))",
instant1Utc,
"VALUES (BIGINT '4', BIGINT '4', CAST(ROW(%s, %s, 0, NULL) AS row(min timestamp(6) with time zone, max timestamp(6) with time zone, null_count bigint, nan_count bigint)))",
format == ORC ? "TIMESTAMP '1969-12-01 05:06:07.234000 UTC'" : instant4Utc,
format == ORC ? "TIMESTAMP '2021-10-31 00:30:00.007999 UTC'" : instant3Utc));
}

// show stats
assertThat(query("SHOW STATS FOR " + tableName))
.skippingTypesCheck()
.matches("VALUES " +
"('_timestamptz', NULL, NULL, 0e0, NULL, '2021-10-31 00:30:00.005 UTC', '2021-10-31 00:30:00.007 UTC'), " +
"(NULL, NULL, NULL, NULL, 3e0, NULL, NULL)");
"('_timestamptz', NULL, NULL, 0e0, NULL, '1969-12-01 05:06:07.234 UTC', '2021-10-31 00:30:00.007 UTC'), " +
"(NULL, NULL, NULL, NULL, 4e0, NULL, NULL)");

if (partitioned) {
// show stats with predicate
Expand Down