Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added support for extract to timestamps with TZ. (#412)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Sep 16, 2021
1 parent b1edb5c commit 06c45f8
Show file tree
Hide file tree
Showing 2 changed files with 183 additions and 10 deletions.
147 changes: 141 additions & 6 deletions src/compute/temporal.rs
Expand Up @@ -23,10 +23,120 @@ use crate::array::*;
use crate::datatypes::*;
use crate::error::{ArrowError, Result};
use crate::temporal_conversions::*;
use crate::types::NativeType;
use crate::types::NaturalDataType;

use super::arity::unary;

/// Extracts the hours of a given temporal array as an array of integers
fn extract_impl<T, A, F>(
array: &PrimitiveArray<i64>,
time_unit: TimeUnit,
timezone: T,
extract: F,
) -> PrimitiveArray<A>
where
T: chrono::TimeZone,
A: NativeType + NaturalDataType,
F: Fn(chrono::DateTime<T>) -> A,
{
match time_unit {
TimeUnit::Second => {
let op = |x| {
let datetime = timestamp_s_to_datetime(x);
let offset = timezone.offset_from_utc_datetime(&datetime);
extract(chrono::DateTime::<T>::from_utc(datetime, offset))
};
unary(array, op, DataType::UInt32)
}
TimeUnit::Millisecond => {
let op = |x| {
let datetime = timestamp_ms_to_datetime(x);
let offset = timezone.offset_from_utc_datetime(&datetime);
extract(chrono::DateTime::<T>::from_utc(datetime, offset))
};
unary(array, op, A::DATA_TYPE)
}
TimeUnit::Microsecond => {
let op = |x| {
let datetime = timestamp_us_to_datetime(x);
let offset = timezone.offset_from_utc_datetime(&datetime);
extract(chrono::DateTime::<T>::from_utc(datetime, offset))
};
unary(array, op, A::DATA_TYPE)
}
TimeUnit::Nanosecond => {
let op = |x| {
let datetime = timestamp_ns_to_datetime(x);
let offset = timezone.offset_from_utc_datetime(&datetime);
extract(chrono::DateTime::<T>::from_utc(datetime, offset))
};
unary(array, op, A::DATA_TYPE)
}
}
}

#[cfg(feature = "chrono-tz")]
#[cfg_attr(docsrs, doc(cfg(feature = "chrono-tz")))]
fn chrono_tz_hour(
array: &PrimitiveArray<i64>,
time_unit: TimeUnit,
timezone_str: &str,
) -> Result<PrimitiveArray<u32>> {
let timezone = parse_offset_tz(timezone_str);
if let Some(timezone) = timezone {
Ok(extract_impl(array, time_unit, timezone, |x| x.hour()))
} else {
Err(ArrowError::InvalidArgumentError(format!(
"timezone \"{}\" cannot be parsed",
timezone_str
)))
}
}

#[cfg(not(feature = "chrono-tz"))]
fn chrono_tz_hour(
_: &PrimitiveArray<i64>,
_: TimeUnit,
timezone_str: &str,
) -> Result<PrimitiveArray<u32>> {
Err(ArrowError::InvalidArgumentError(format!(
"timezone \"{}\" cannot be parsed (feature chrono-tz is not active)",
timezone_str
)))
}

#[cfg(feature = "chrono-tz")]
#[cfg_attr(docsrs, doc(cfg(feature = "chrono-tz")))]
fn chrono_tz_year(
array: &PrimitiveArray<i64>,
time_unit: TimeUnit,
timezone_str: &str,
) -> Result<PrimitiveArray<i32>> {
let timezone = parse_offset_tz(timezone_str);
if let Some(timezone) = timezone {
Ok(extract_impl(array, time_unit, timezone, |x| x.year()))
} else {
Err(ArrowError::InvalidArgumentError(format!(
"timezone \"{}\" cannot be parsed",
timezone_str
)))
}
}

#[cfg(not(feature = "chrono-tz"))]
fn chrono_tz_year(
_: &PrimitiveArray<i64>,
_: TimeUnit,
timezone_str: &str,
) -> Result<PrimitiveArray<i32>> {
Err(ArrowError::InvalidArgumentError(format!(
"timezone \"{}\" cannot be parsed (feature chrono-tz is not active)",
timezone_str
)))
}

/// Extracts the hours of a temporal array as [`PrimitiveArray<u32>`].
/// Use [`can_hour`] to check if this operation is supported for the target [`DataType`].
pub fn hour(array: &dyn Array) -> Result<PrimitiveArray<u32>> {
let final_data_type = DataType::UInt32;
match array.data_type() {
Expand All @@ -37,7 +147,7 @@ pub fn hour(array: &dyn Array) -> Result<PrimitiveArray<u32>> {
.unwrap();
Ok(unary(array, |x| time32s_to_time(x).hour(), final_data_type))
}
DataType::Time32(TimeUnit::Microsecond) => {
DataType::Time32(TimeUnit::Millisecond) => {
let array = array
.as_any()
.downcast_ref::<PrimitiveArray<i32>>()
Expand Down Expand Up @@ -105,6 +215,18 @@ pub fn hour(array: &dyn Array) -> Result<PrimitiveArray<u32>> {
};
Ok(unary(array, op, final_data_type))
}
DataType::Timestamp(time_unit, Some(timezone_str)) => {
let time_unit = *time_unit;
let timezone = parse_offset(timezone_str);

let array = array.as_any().downcast_ref().unwrap();

if let Ok(timezone) = timezone {
Ok(extract_impl(array, time_unit, timezone, |x| x.hour()))
} else {
chrono_tz_hour(array, time_unit, timezone_str)
}
}
dt => Err(ArrowError::NotYetImplemented(format!(
"\"hour\" does not support type {:?}",
dt
Expand All @@ -129,16 +251,17 @@ pub fn can_hour(data_type: &DataType) -> bool {
matches!(
data_type,
DataType::Time32(TimeUnit::Second)
| DataType::Time32(TimeUnit::Microsecond)
| DataType::Time32(TimeUnit::Millisecond)
| DataType::Time64(TimeUnit::Microsecond)
| DataType::Time64(TimeUnit::Nanosecond)
| DataType::Date32
| DataType::Date64
| DataType::Timestamp(_, None)
| DataType::Timestamp(_, _)
)
}

/// Extracts the hours of a given temporal array as an array of integers
/// Extracts the years of a temporal array as [`PrimitiveArray<i32>`].
/// Use [`can_year`] to check if this operation is supported for the target [`DataType`].
pub fn year(array: &dyn Array) -> Result<PrimitiveArray<i32>> {
let final_data_type = DataType::Int32;
match array.data_type() {
Expand Down Expand Up @@ -177,6 +300,18 @@ pub fn year(array: &dyn Array) -> Result<PrimitiveArray<i32>> {
};
Ok(unary(array, op, final_data_type))
}
DataType::Timestamp(time_unit, Some(timezone_str)) => {
let time_unit = *time_unit;
let timezone = parse_offset(timezone_str);

let array = array.as_any().downcast_ref().unwrap();

if let Ok(timezone) = timezone {
Ok(extract_impl(array, time_unit, timezone, |x| x.year()))
} else {
chrono_tz_year(array, time_unit, timezone_str)
}
}
dt => Err(ArrowError::NotYetImplemented(format!(
"\"year\" does not support type {:?}",
dt
Expand All @@ -200,6 +335,6 @@ pub fn year(array: &dyn Array) -> Result<PrimitiveArray<i32>> {
pub fn can_year(data_type: &DataType) -> bool {
matches!(
data_type,
DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None)
DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _)
)
}
46 changes: 42 additions & 4 deletions tests/it/compute/temporal.rs
Expand Up @@ -41,7 +41,7 @@ fn time64_micro_hour() {
}

#[test]
fn timestamp_micro_hour() {
fn naive_timestamp_micro_hour() {
let array = Int64Array::from(&[Some(37800000000), None])
.to(DataType::Timestamp(TimeUnit::Microsecond, None));

Expand All @@ -51,7 +51,7 @@ fn timestamp_micro_hour() {
}

#[test]
fn timestamp_date64_year() {
fn date64_year() {
let array = Int64Array::from(&[Some(1514764800000), None]).to(DataType::Date64);

let result = year(&array).unwrap();
Expand All @@ -60,7 +60,7 @@ fn timestamp_date64_year() {
}

#[test]
fn timestamp_date32_year() {
fn naive_timestamp_date32_year() {
let array = Int32Array::from(&[Some(15147), None]).to(DataType::Date32);

let result = year(&array).unwrap();
Expand All @@ -69,7 +69,7 @@ fn timestamp_date32_year() {
}

#[test]
fn timestamp_micro_year() {
fn naive_timestamp_micro_year() {
let array = Int64Array::from(&[Some(1612025847000000), None])
.to(DataType::Timestamp(TimeUnit::Microsecond, None));

Expand All @@ -78,6 +78,42 @@ fn timestamp_micro_year() {
assert_eq!(result, expected);
}

#[test]
fn timestamp_micro_hour() {
let array = Int64Array::from(&[Some(1621877130000000), None]).to(DataType::Timestamp(
TimeUnit::Microsecond,
Some("+01:00".to_string()),
));

let result = hour(&array).unwrap();
let expected = UInt32Array::from(&[Some(18), None]);
assert_eq!(result, expected);
}

#[cfg(feature = "chrono-tz")]
#[test]
fn timestamp_micro_hour_tz() {
let timestamp = 1621877130000000; // Mon May 24 2021 17:25:30 GMT+0000
let array = Int64Array::from(&[Some(timestamp), None]).to(DataType::Timestamp(
TimeUnit::Microsecond,
Some("GMT".to_string()),
));

let result = hour(&array).unwrap();
let expected = UInt32Array::from(&[Some(17), None]);
assert_eq!(result, expected);

// (Western European Summer Time in Lisbon) => +1 hour
let array = Int64Array::from(&[Some(timestamp), None]).to(DataType::Timestamp(
TimeUnit::Microsecond,
Some("Europe/Lisbon".to_string()),
));

let result = hour(&array).unwrap();
let expected = UInt32Array::from(&[Some(18), None]);
assert_eq!(result, expected);
}

#[test]
fn consistency_hour() {
use arrow2::array::new_null_array;
Expand All @@ -101,6 +137,7 @@ fn consistency_hour() {
Timestamp(TimeUnit::Millisecond, None),
Timestamp(TimeUnit::Microsecond, None),
Timestamp(TimeUnit::Nanosecond, None),
Timestamp(TimeUnit::Nanosecond, Some("+00:00".to_string())),
Time64(TimeUnit::Microsecond),
Time64(TimeUnit::Nanosecond),
Date32,
Expand Down Expand Up @@ -150,6 +187,7 @@ fn consistency_year() {
Timestamp(TimeUnit::Millisecond, None),
Timestamp(TimeUnit::Microsecond, None),
Timestamp(TimeUnit::Nanosecond, None),
Timestamp(TimeUnit::Nanosecond, Some("+00:00".to_string())),
Time64(TimeUnit::Microsecond),
Time64(TimeUnit::Nanosecond),
Date32,
Expand Down

0 comments on commit 06c45f8

Please sign in to comment.