diff --git a/src/compute/temporal.rs b/src/compute/temporal.rs index ae8130ee597..2610d8d8a67 100644 --- a/src/compute/temporal.rs +++ b/src/compute/temporal.rs @@ -23,10 +23,120 @@ use crate::array::*; use crate::datatypes::*; use crate::error::{ArrowError, Result}; use crate::temporal_conversions::*; +use crate::types::NativeType; +use crate::types::NaturalDataType; use super::arity::unary; -/// Extracts the hours of a given temporal array as an array of integers +fn extract_impl( + array: &PrimitiveArray, + time_unit: TimeUnit, + timezone: T, + extract: F, +) -> PrimitiveArray +where + T: chrono::TimeZone, + A: NativeType + NaturalDataType, + F: Fn(chrono::DateTime) -> A, +{ + match time_unit { + TimeUnit::Second => { + let op = |x| { + let datetime = timestamp_s_to_datetime(x); + let offset = timezone.offset_from_utc_datetime(&datetime); + extract(chrono::DateTime::::from_utc(datetime, offset)) + }; + unary(array, op, DataType::UInt32) + } + TimeUnit::Millisecond => { + let op = |x| { + let datetime = timestamp_ms_to_datetime(x); + let offset = timezone.offset_from_utc_datetime(&datetime); + extract(chrono::DateTime::::from_utc(datetime, offset)) + }; + unary(array, op, A::DATA_TYPE) + } + TimeUnit::Microsecond => { + let op = |x| { + let datetime = timestamp_us_to_datetime(x); + let offset = timezone.offset_from_utc_datetime(&datetime); + extract(chrono::DateTime::::from_utc(datetime, offset)) + }; + unary(array, op, A::DATA_TYPE) + } + TimeUnit::Nanosecond => { + let op = |x| { + let datetime = timestamp_ns_to_datetime(x); + let offset = timezone.offset_from_utc_datetime(&datetime); + extract(chrono::DateTime::::from_utc(datetime, offset)) + }; + unary(array, op, A::DATA_TYPE) + } + } +} + +#[cfg(feature = "chrono-tz")] +#[cfg_attr(docsrs, doc(cfg(feature = "chrono-tz")))] +fn chrono_tz_hour( + array: &PrimitiveArray, + time_unit: TimeUnit, + timezone_str: &str, +) -> Result> { + let timezone = parse_offset_tz(timezone_str); + if let Some(timezone) = timezone { + Ok(extract_impl(array, time_unit, timezone, |x| x.hour())) + } else { + Err(ArrowError::InvalidArgumentError(format!( + "timezone \"{}\" cannot be parsed", + timezone_str + ))) + } +} + +#[cfg(not(feature = "chrono-tz"))] +fn chrono_tz_hour( + _: &PrimitiveArray, + _: TimeUnit, + timezone_str: &str, +) -> Result> { + Err(ArrowError::InvalidArgumentError(format!( + "timezone \"{}\" cannot be parsed (feature chrono-tz is not active)", + timezone_str + ))) +} + +#[cfg(feature = "chrono-tz")] +#[cfg_attr(docsrs, doc(cfg(feature = "chrono-tz")))] +fn chrono_tz_year( + array: &PrimitiveArray, + time_unit: TimeUnit, + timezone_str: &str, +) -> Result> { + let timezone = parse_offset_tz(timezone_str); + if let Some(timezone) = timezone { + Ok(extract_impl(array, time_unit, timezone, |x| x.year())) + } else { + Err(ArrowError::InvalidArgumentError(format!( + "timezone \"{}\" cannot be parsed", + timezone_str + ))) + } +} + +#[cfg(not(feature = "chrono-tz"))] +fn chrono_tz_year( + _: &PrimitiveArray, + _: TimeUnit, + timezone_str: &str, +) -> Result> { + Err(ArrowError::InvalidArgumentError(format!( + "timezone \"{}\" cannot be parsed (feature chrono-tz is not active)", + timezone_str + ))) +} + +/// Extracts the hours of a temporal array as [`PrimitiveArray`]. +/// Use [`can_hour`] to check if this operation is supported for the target [`DataType`]. pub fn hour(array: &dyn Array) -> Result> { let final_data_type = DataType::UInt32; match array.data_type() { @@ -37,7 +147,7 @@ pub fn hour(array: &dyn Array) -> Result> { .unwrap(); Ok(unary(array, |x| time32s_to_time(x).hour(), final_data_type)) } - DataType::Time32(TimeUnit::Microsecond) => { + DataType::Time32(TimeUnit::Millisecond) => { let array = array .as_any() .downcast_ref::>() @@ -105,6 +215,18 @@ pub fn hour(array: &dyn Array) -> Result> { }; Ok(unary(array, op, final_data_type)) } + DataType::Timestamp(time_unit, Some(timezone_str)) => { + let time_unit = *time_unit; + let timezone = parse_offset(timezone_str); + + let array = array.as_any().downcast_ref().unwrap(); + + if let Ok(timezone) = timezone { + Ok(extract_impl(array, time_unit, timezone, |x| x.hour())) + } else { + chrono_tz_hour(array, time_unit, timezone_str) + } + } dt => Err(ArrowError::NotYetImplemented(format!( "\"hour\" does not support type {:?}", dt @@ -129,16 +251,17 @@ pub fn can_hour(data_type: &DataType) -> bool { matches!( data_type, DataType::Time32(TimeUnit::Second) - | DataType::Time32(TimeUnit::Microsecond) + | DataType::Time32(TimeUnit::Millisecond) | DataType::Time64(TimeUnit::Microsecond) | DataType::Time64(TimeUnit::Nanosecond) | DataType::Date32 | DataType::Date64 - | DataType::Timestamp(_, None) + | DataType::Timestamp(_, _) ) } -/// Extracts the hours of a given temporal array as an array of integers +/// Extracts the years of a temporal array as [`PrimitiveArray`]. +/// Use [`can_year`] to check if this operation is supported for the target [`DataType`]. pub fn year(array: &dyn Array) -> Result> { let final_data_type = DataType::Int32; match array.data_type() { @@ -177,6 +300,18 @@ pub fn year(array: &dyn Array) -> Result> { }; Ok(unary(array, op, final_data_type)) } + DataType::Timestamp(time_unit, Some(timezone_str)) => { + let time_unit = *time_unit; + let timezone = parse_offset(timezone_str); + + let array = array.as_any().downcast_ref().unwrap(); + + if let Ok(timezone) = timezone { + Ok(extract_impl(array, time_unit, timezone, |x| x.year())) + } else { + chrono_tz_year(array, time_unit, timezone_str) + } + } dt => Err(ArrowError::NotYetImplemented(format!( "\"year\" does not support type {:?}", dt @@ -200,6 +335,6 @@ pub fn year(array: &dyn Array) -> Result> { pub fn can_year(data_type: &DataType) -> bool { matches!( data_type, - DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) + DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _) ) } diff --git a/tests/it/compute/temporal.rs b/tests/it/compute/temporal.rs index 22a7e202757..160d880a91a 100644 --- a/tests/it/compute/temporal.rs +++ b/tests/it/compute/temporal.rs @@ -41,7 +41,7 @@ fn time64_micro_hour() { } #[test] -fn timestamp_micro_hour() { +fn naive_timestamp_micro_hour() { let array = Int64Array::from(&[Some(37800000000), None]) .to(DataType::Timestamp(TimeUnit::Microsecond, None)); @@ -51,7 +51,7 @@ fn timestamp_micro_hour() { } #[test] -fn timestamp_date64_year() { +fn date64_year() { let array = Int64Array::from(&[Some(1514764800000), None]).to(DataType::Date64); let result = year(&array).unwrap(); @@ -60,7 +60,7 @@ fn timestamp_date64_year() { } #[test] -fn timestamp_date32_year() { +fn naive_timestamp_date32_year() { let array = Int32Array::from(&[Some(15147), None]).to(DataType::Date32); let result = year(&array).unwrap(); @@ -69,7 +69,7 @@ fn timestamp_date32_year() { } #[test] -fn timestamp_micro_year() { +fn naive_timestamp_micro_year() { let array = Int64Array::from(&[Some(1612025847000000), None]) .to(DataType::Timestamp(TimeUnit::Microsecond, None)); @@ -78,6 +78,42 @@ fn timestamp_micro_year() { assert_eq!(result, expected); } +#[test] +fn timestamp_micro_hour() { + let array = Int64Array::from(&[Some(1621877130000000), None]).to(DataType::Timestamp( + TimeUnit::Microsecond, + Some("+01:00".to_string()), + )); + + let result = hour(&array).unwrap(); + let expected = UInt32Array::from(&[Some(18), None]); + assert_eq!(result, expected); +} + +#[cfg(feature = "chrono-tz")] +#[test] +fn timestamp_micro_hour_tz() { + let timestamp = 1621877130000000; // Mon May 24 2021 17:25:30 GMT+0000 + let array = Int64Array::from(&[Some(timestamp), None]).to(DataType::Timestamp( + TimeUnit::Microsecond, + Some("GMT".to_string()), + )); + + let result = hour(&array).unwrap(); + let expected = UInt32Array::from(&[Some(17), None]); + assert_eq!(result, expected); + + // (Western European Summer Time in Lisbon) => +1 hour + let array = Int64Array::from(&[Some(timestamp), None]).to(DataType::Timestamp( + TimeUnit::Microsecond, + Some("Europe/Lisbon".to_string()), + )); + + let result = hour(&array).unwrap(); + let expected = UInt32Array::from(&[Some(18), None]); + assert_eq!(result, expected); +} + #[test] fn consistency_hour() { use arrow2::array::new_null_array; @@ -101,6 +137,7 @@ fn consistency_hour() { Timestamp(TimeUnit::Millisecond, None), Timestamp(TimeUnit::Microsecond, None), Timestamp(TimeUnit::Nanosecond, None), + Timestamp(TimeUnit::Nanosecond, Some("+00:00".to_string())), Time64(TimeUnit::Microsecond), Time64(TimeUnit::Nanosecond), Date32, @@ -150,6 +187,7 @@ fn consistency_year() { Timestamp(TimeUnit::Millisecond, None), Timestamp(TimeUnit::Microsecond, None), Timestamp(TimeUnit::Nanosecond, None), + Timestamp(TimeUnit::Nanosecond, Some("+00:00".to_string())), Time64(TimeUnit::Microsecond), Time64(TimeUnit::Nanosecond), Date32,