Skip to content

Commit

Permalink
HIVE-27513: Iceberg: Fetch task returns wrong results for Timestamp w…
Browse files Browse the repository at this point in the history
…ith local time zone datatype for Iceberg tables (Sourabh Badhya, reviewed by Ayush Saxena, Denys Kuzmenko)

Closes apache#4498
  • Loading branch information
SourabhBadhya authored and tarak271 committed Dec 19, 2023
1 parent fad3aae commit 3bf6395
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@
package org.apache.iceberg.mr.hive.serde.objectinspector;

import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import org.apache.hadoop.hive.common.type.TimestampTZ;
import org.apache.hadoop.hive.serde2.io.TimestampLocalTZWritable;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampLocalTZObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;


Expand Down Expand Up @@ -58,7 +58,7 @@ public TimestampTZ getPrimitiveJavaObject(Object o) {
return null;
}
OffsetDateTime odt = (OffsetDateTime) o;
ZonedDateTime zdt = odt.atZoneSameInstant(ZoneOffset.UTC);
ZonedDateTime zdt = odt.atZoneSameInstant(((TimestampLocalTZTypeInfo) typeInfo).getTimeZone());
return new TimestampTZ(zdt);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import java.util.TimeZone;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.iceberg.FileFormat;
import org.apache.iceberg.Schema;
import org.apache.iceberg.common.DynFields;
Expand Down Expand Up @@ -101,6 +102,7 @@ public static void afterClass() throws Exception {
@Before
public void before() throws IOException {
TimeZone.setDefault(TimeZone.getTimeZone(timezoneString));
TypeInfoFactory.timestampLocalTZTypeInfo.setTimeZone(TimeZone.getTimeZone(timezoneString).toZoneId());

// Magic to clean cached date format and local timezone for Hive where the default timezone is used/stored in the
// cached object
Expand Down Expand Up @@ -192,4 +194,25 @@ public void testTimestampQueryWithTimeZone() throws IOException {
Assert.assertEquals("2019-02-22 09:44:54.2 " + timezoneString, result.get(0)[0]);
}
}

@Test
public void testFetchTaskWithTimestampWithLocalTimeZone() throws IOException {
Schema timestampSchema = new Schema(optional(1, "d_ts", Types.TimestampType.withZone()));

List<Record> records = TestHelper.RecordsBuilder.newInstance(timestampSchema)
.add(OffsetDateTime.of(LocalDateTime.of(2019, 2, 22, 9, 44, 54, 100000000), ZoneOffset.of("+00")))
.build();

testTables.createTable(shell, "ts_test_tz", timestampSchema, FileFormat.PARQUET, records);

List<Object[]> result = shell.executeStatement("SELECT * FROM ts_test_tz");
Assert.assertEquals(1, result.size());
if (timezoneString.equals("America/New_York")) {
Assert.assertEquals("2019-02-22 04:44:54.1 " + timezoneString, result.get(0)[0]);
} else if (timezoneString.equals("Asia/Kolkata")) {
Assert.assertEquals("2019-02-22 15:14:54.1 " + timezoneString, result.get(0)[0]);
} else if (timezoneString.equals("GMT")) {
Assert.assertEquals("2019-02-22 09:44:54.1 " + timezoneString, result.get(0)[0]);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.time.OffsetDateTime;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import org.apache.hadoop.hive.common.type.TimestampTZ;
import org.apache.hadoop.hive.serde2.io.TimestampLocalTZWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
Expand Down Expand Up @@ -53,9 +54,11 @@ public void testIcebergTimestampLocalTZObjectInspector() {

LocalDateTime dateTimeAtUTC = LocalDateTime.of(2020, 12, 10, 15, 55, 20, 30000);
OffsetDateTime offsetDateTime = OffsetDateTime.of(dateTimeAtUTC.plusHours(4), ZoneOffset.ofHours(4));
ZonedDateTime zdt = offsetDateTime.atZoneSameInstant(TypeInfoFactory.timestampLocalTZTypeInfo.getTimeZone());
TimestampTZ ts = new TimestampTZ(dateTimeAtUTC.atZone(ZoneId.of("UTC")));

Assert.assertEquals(ts, oi.getPrimitiveJavaObject(offsetDateTime));
Assert.assertEquals(zdt, oi.getPrimitiveJavaObject(offsetDateTime).getZonedDateTime());
Assert.assertEquals(new TimestampLocalTZWritable(ts), oi.getPrimitiveWritableObject(offsetDateTime));

// try with another offset as well
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
create table ice_ts_4 (id int, ts timestamp with local time zone) stored by iceberg stored as parquet;
insert into ice_ts_4 values (1, cast('2023-07-20 00:00:00' as timestamp with local time zone));

set hive.fetch.task.conversion=none;
select * from ice_ts_4;

set hive.fetch.task.conversion=more;
select * from ice_ts_4;
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
PREHOOK: query: create table ice_ts_4 (id int, ts timestamp with local time zone) stored by iceberg stored as parquet
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@ice_ts_4
POSTHOOK: query: create table ice_ts_4 (id int, ts timestamp with local time zone) stored by iceberg stored as parquet
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@ice_ts_4
PREHOOK: query: insert into ice_ts_4 values (1, cast('2023-07-20 00:00:00' as timestamp with local time zone))
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@ice_ts_4
POSTHOOK: query: insert into ice_ts_4 values (1, cast('2023-07-20 00:00:00' as timestamp with local time zone))
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@ice_ts_4
PREHOOK: query: select * from ice_ts_4
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_ts_4
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select * from ice_ts_4
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_ts_4
POSTHOOK: Output: hdfs://### HDFS PATH ###
1 2023-07-20 00:00:00.0 US/Pacific
PREHOOK: query: select * from ice_ts_4
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_ts_4
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select * from ice_ts_4
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_ts_4
POSTHOOK: Output: hdfs://### HDFS PATH ###
1 2023-07-20 00:00:00.0 US/Pacific

0 comments on commit 3bf6395

Please sign in to comment.