Skip to content

Commit

Permalink
feat!: Smaller integer data types for datetime components (#12070)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Dec 1, 2023
1 parent b9a2531 commit 97db209
Show file tree
Hide file tree
Showing 13 changed files with 228 additions and 202 deletions.
98 changes: 55 additions & 43 deletions crates/polars-arrow/src/compute/temporal.rs
Expand Up @@ -27,42 +27,46 @@ use crate::temporal_conversions::*;
use crate::types::NativeType;

// Create and implement a trait that converts chrono's `Weekday`
// type into `u32`
trait U32Weekday: Datelike {
fn u32_weekday(&self) -> u32 {
self.weekday().number_from_monday()
// type into `i8`
trait Int8Weekday: Datelike {
fn i8_weekday(&self) -> i8 {
self.weekday().number_from_monday().try_into().unwrap()
}
}

impl U32Weekday for chrono::NaiveDateTime {}
impl<T: chrono::TimeZone> U32Weekday for chrono::DateTime<T> {}
impl Int8Weekday for chrono::NaiveDateTime {}
impl<T: chrono::TimeZone> Int8Weekday for chrono::DateTime<T> {}

// Create and implement a trait that converts chrono's `IsoWeek`
// type into `u32`
trait U32IsoWeek: Datelike {
fn u32_iso_week(&self) -> u32 {
self.iso_week().week()
// type into `i8`
trait Int8IsoWeek: Datelike {
fn i8_iso_week(&self) -> i8 {
self.iso_week().week().try_into().unwrap()
}
}

impl U32IsoWeek for chrono::NaiveDateTime {}
impl<T: chrono::TimeZone> U32IsoWeek for chrono::DateTime<T> {}
impl Int8IsoWeek for chrono::NaiveDateTime {}
impl<T: chrono::TimeZone> Int8IsoWeek for chrono::DateTime<T> {}

// Macro to avoid repetition in functions, that apply
// `chrono::Datelike` methods on Arrays
macro_rules! date_like {
($extract:ident, $array:ident, $data_type:path) => {
match $array.data_type().to_logical_type() {
ArrowDataType::Date32 | ArrowDataType::Date64 | ArrowDataType::Timestamp(_, None) => {
date_variants($array, $data_type, |x| x.$extract())
date_variants($array, $data_type, |x| x.$extract().try_into().unwrap())
},
ArrowDataType::Timestamp(time_unit, Some(timezone_str)) => {
let array = $array.as_any().downcast_ref().unwrap();

if let Ok(timezone) = parse_offset(timezone_str) {
Ok(extract_impl(array, *time_unit, timezone, |x| x.$extract()))
Ok(extract_impl(array, *time_unit, timezone, |x| {
x.$extract().try_into().unwrap()
}))
} else {
chrono_tz(array, *time_unit, timezone_str, |x| x.$extract())
chrono_tz(array, *time_unit, timezone_str, |x| {
x.$extract().try_into().unwrap()
})
}
},
_ => unimplemented!(),
Expand All @@ -76,32 +80,32 @@ pub fn year(array: &dyn Array) -> PolarsResult<PrimitiveArray<i32>> {
date_like!(year, array, ArrowDataType::Int32)
}

/// Extracts the months of a temporal array as [`PrimitiveArray<u32>`].
/// Extracts the months of a temporal array as [`PrimitiveArray<i8>`].
/// Value ranges from 1 to 12.
/// Use [`can_month`] to check if this operation is supported for the target [`ArrowDataType`].
pub fn month(array: &dyn Array) -> PolarsResult<PrimitiveArray<u32>> {
date_like!(month, array, ArrowDataType::UInt32)
pub fn month(array: &dyn Array) -> PolarsResult<PrimitiveArray<i8>> {
date_like!(month, array, ArrowDataType::Int8)
}

/// Extracts the days of a temporal array as [`PrimitiveArray<u32>`].
/// Extracts the days of a temporal array as [`PrimitiveArray<i8>`].
/// Value ranges from 1 to 32 (Last day depends on month).
/// Use [`can_day`] to check if this operation is supported for the target [`ArrowDataType`].
pub fn day(array: &dyn Array) -> PolarsResult<PrimitiveArray<u32>> {
date_like!(day, array, ArrowDataType::UInt32)
pub fn day(array: &dyn Array) -> PolarsResult<PrimitiveArray<i8>> {
date_like!(day, array, ArrowDataType::Int8)
}

/// Extracts weekday of a temporal array as [`PrimitiveArray<u32>`].
/// Extracts weekday of a temporal array as [`PrimitiveArray<i8>`].
/// Monday is 1, Tuesday is 2, ..., Sunday is 7.
/// Use [`can_weekday`] to check if this operation is supported for the target [`ArrowDataType`]
pub fn weekday(array: &dyn Array) -> PolarsResult<PrimitiveArray<u32>> {
date_like!(u32_weekday, array, ArrowDataType::UInt32)
pub fn weekday(array: &dyn Array) -> PolarsResult<PrimitiveArray<i8>> {
date_like!(i8_weekday, array, ArrowDataType::Int8)
}

/// Extracts ISO week of a temporal array as [`PrimitiveArray<u32>`]
/// Extracts ISO week of a temporal array as [`PrimitiveArray<i8>`]
/// Value ranges from 1 to 53 (Last week depends on the year).
/// Use [`can_iso_week`] to check if this operation is supported for the target [`ArrowDataType`]
pub fn iso_week(array: &dyn Array) -> PolarsResult<PrimitiveArray<u32>> {
date_like!(u32_iso_week, array, ArrowDataType::UInt32)
pub fn iso_week(array: &dyn Array) -> PolarsResult<PrimitiveArray<i8>> {
date_like!(i8_iso_week, array, ArrowDataType::Int8)
}

// Macro to avoid repetition in functions, that apply
Expand All @@ -110,50 +114,58 @@ macro_rules! time_like {
($extract:ident, $array:ident, $data_type:path) => {
match $array.data_type().to_logical_type() {
ArrowDataType::Date32 | ArrowDataType::Date64 | ArrowDataType::Timestamp(_, None) => {
date_variants($array, $data_type, |x| x.$extract())
date_variants($array, $data_type, |x| x.$extract().try_into().unwrap())
},
ArrowDataType::Time32(_) | ArrowDataType::Time64(_) => {
time_variants($array, ArrowDataType::UInt32, |x| x.$extract())
time_variants($array, ArrowDataType::UInt32, |x| {
x.$extract().try_into().unwrap()
})
},
ArrowDataType::Timestamp(time_unit, Some(timezone_str)) => {
let array = $array.as_any().downcast_ref().unwrap();

if let Ok(timezone) = parse_offset(timezone_str) {
Ok(extract_impl(array, *time_unit, timezone, |x| x.$extract()))
Ok(extract_impl(array, *time_unit, timezone, |x| {
x.$extract().try_into().unwrap()
}))
} else {
chrono_tz(array, *time_unit, timezone_str, |x| x.$extract())
chrono_tz(array, *time_unit, timezone_str, |x| {
x.$extract().try_into().unwrap()
})
}
},
_ => unimplemented!(),
}
};
}

/// Extracts the hours of a temporal array as [`PrimitiveArray<u32>`].
/// Extracts the hours of a temporal array as [`PrimitiveArray<i8>`].
/// Value ranges from 0 to 23.
/// Use [`can_hour`] to check if this operation is supported for the target [`ArrowDataType`].
pub fn hour(array: &dyn Array) -> PolarsResult<PrimitiveArray<u32>> {
time_like!(hour, array, ArrowDataType::UInt32)
pub fn hour(array: &dyn Array) -> PolarsResult<PrimitiveArray<i8>> {
time_like!(hour, array, ArrowDataType::Int8)
}

/// Extracts the minutes of a temporal array as [`PrimitiveArray<u32>`].
/// Extracts the minutes of a temporal array as [`PrimitiveArray<i8>`].
/// Value ranges from 0 to 59.
/// Use [`can_minute`] to check if this operation is supported for the target [`ArrowDataType`].
pub fn minute(array: &dyn Array) -> PolarsResult<PrimitiveArray<u32>> {
time_like!(minute, array, ArrowDataType::UInt32)
pub fn minute(array: &dyn Array) -> PolarsResult<PrimitiveArray<i8>> {
time_like!(minute, array, ArrowDataType::Int8)
}

/// Extracts the seconds of a temporal array as [`PrimitiveArray<u32>`].
/// Extracts the seconds of a temporal array as [`PrimitiveArray<i8>`].
/// Value ranges from 0 to 59.
/// Use [`can_second`] to check if this operation is supported for the target [`ArrowDataType`].
pub fn second(array: &dyn Array) -> PolarsResult<PrimitiveArray<u32>> {
time_like!(second, array, ArrowDataType::UInt32)
pub fn second(array: &dyn Array) -> PolarsResult<PrimitiveArray<i8>> {
time_like!(second, array, ArrowDataType::Int8)
}

/// Extracts the nanoseconds of a temporal array as [`PrimitiveArray<u32>`].
/// Extracts the nanoseconds of a temporal array as [`PrimitiveArray<i32>`].
/// Value ranges from 0 to 1_999_999_999.
/// The range from 1_000_000_000 to 1_999_999_999 represents the leap second.
/// Use [`can_nanosecond`] to check if this operation is supported for the target [`ArrowDataType`].
pub fn nanosecond(array: &dyn Array) -> PolarsResult<PrimitiveArray<u32>> {
time_like!(nanosecond, array, ArrowDataType::UInt32)
pub fn nanosecond(array: &dyn Array) -> PolarsResult<PrimitiveArray<i32>> {
time_like!(nanosecond, array, ArrowDataType::Int32)
}

fn date_variants<F, O>(
Expand Down
10 changes: 9 additions & 1 deletion crates/polars-lazy/Cargo.toml
Expand Up @@ -49,7 +49,15 @@ cloud_write = ["cloud"]
ipc = ["polars-io/ipc", "polars-plan/ipc", "polars-pipe?/ipc"]
json = ["polars-io/json", "polars-plan/json", "polars-json", "polars-pipe/json"]
csv = ["polars-io/csv", "polars-plan/csv", "polars-pipe?/csv"]
temporal = ["dtype-datetime", "dtype-date", "dtype-time", "dtype-duration", "polars-plan/temporal"]
temporal = [
"dtype-datetime",
"dtype-date",
"dtype-time",
"dtype-i8",
"dtype-i16",
"dtype-duration",
"polars-plan/temporal",
]
# debugging purposes
fmt = ["polars-core/fmt", "polars-plan/fmt"]
strings = ["polars-plan/strings"]
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-plan/Cargo.toml
Expand Up @@ -59,7 +59,7 @@ cloud = ["async", "polars-io/cloud"]
ipc = ["polars-io/ipc"]
json = ["polars-io/json", "polars-json"]
csv = ["polars-io/csv"]
temporal = ["polars-core/temporal", "dtype-date", "dtype-datetime", "dtype-time"]
temporal = ["polars-core/temporal", "dtype-date", "dtype-datetime", "dtype-time", "dtype-i8", "dtype-i16"]
# debugging purposes
fmt = ["polars-core/fmt"]
strings = ["polars-core/strings", "polars-ops/strings"]
Expand Down
7 changes: 5 additions & 2 deletions crates/polars-plan/src/dsl/function_expr/datetime.rs
Expand Up @@ -60,8 +60,11 @@ impl TemporalFunction {
use TemporalFunction::*;
match self {
Year | IsoYear => mapper.with_dtype(DataType::Int32),
Month | Quarter | Week | WeekDay | Day | OrdinalDay | Hour | Minute | Millisecond
| Microsecond | Nanosecond | Second => mapper.with_dtype(DataType::UInt32),
OrdinalDay => mapper.with_dtype(DataType::Int16),
Month | Quarter | Week | WeekDay | Day | Hour | Minute | Second => {
mapper.with_dtype(DataType::Int8)
},
Millisecond | Microsecond | Nanosecond => mapper.with_dtype(DataType::Int32),
ToString(_) => mapper.with_dtype(DataType::Utf8),
WithTimeUnit(_) => mapper.with_same_dtype(),
CastTimeUnit(tu) => mapper.try_map_dtype(|dt| match dt {
Expand Down
22 changes: 11 additions & 11 deletions crates/polars-time/src/chunkedarray/date.rs
Expand Up @@ -35,7 +35,7 @@ pub trait DateMethods: AsDate {

/// Extract month from underlying NaiveDateTime representation.
/// Quarters range from 1 to 4.
fn quarter(&self) -> UInt32Chunked {
fn quarter(&self) -> Int8Chunked {
let months = self.month();
months_to_quarters(months)
}
Expand All @@ -44,40 +44,40 @@ pub trait DateMethods: AsDate {
/// Returns the month number starting from 1.
///
/// The return value ranges from 1 to 12.
fn month(&self) -> UInt32Chunked {
fn month(&self) -> Int8Chunked {
let ca = self.as_date();
ca.apply_kernel_cast::<UInt32Type>(&date_to_month)
ca.apply_kernel_cast::<Int8Type>(&date_to_month)
}

/// Extract ISO weekday from underlying NaiveDate representation.
/// Returns the weekday number where monday = 1 and sunday = 7
fn weekday(&self) -> UInt32Chunked {
fn weekday(&self) -> Int8Chunked {
let ca = self.as_date();
ca.apply_kernel_cast::<UInt32Type>(&date_to_iso_weekday)
ca.apply_kernel_cast::<Int8Type>(&date_to_iso_weekday)
}

/// Returns the ISO week number starting from 1.
/// The return value ranges from 1 to 53. (The last week of year differs by years.)
fn week(&self) -> UInt32Chunked {
fn week(&self) -> Int8Chunked {
let ca = self.as_date();
ca.apply_kernel_cast::<UInt32Type>(&date_to_iso_week)
ca.apply_kernel_cast::<Int8Type>(&date_to_iso_week)
}

/// Extract day from underlying NaiveDate representation.
/// Returns the day of month starting from 1.
///
/// The return value ranges from 1 to 31. (The last day of month differs by months.)
fn day(&self) -> UInt32Chunked {
fn day(&self) -> Int8Chunked {
let ca = self.as_date();
ca.apply_kernel_cast::<UInt32Type>(&date_to_day)
ca.apply_kernel_cast::<Int8Type>(&date_to_day)
}

/// Returns the day of year starting from 1.
///
/// The return value ranges from 1 to 366. (The last day of year differs by years.)
fn ordinal(&self) -> UInt32Chunked {
fn ordinal(&self) -> Int16Chunked {
let ca = self.as_date();
ca.apply_kernel_cast::<UInt32Type>(&date_to_ordinal)
ca.apply_kernel_cast::<Int16Type>(&date_to_ordinal)
}

fn parse_from_str_slice(name: &str, v: &[&str], fmt: &str) -> DateChunked;
Expand Down
22 changes: 11 additions & 11 deletions crates/polars-time/src/chunkedarray/datetime.rs
Expand Up @@ -60,7 +60,7 @@ pub trait DatetimeMethods: AsDatetime {

/// Extract quarter from underlying NaiveDateTime representation.
/// Quarters range from 1 to 4.
fn quarter(&self) -> UInt32Chunked {
fn quarter(&self) -> Int8Chunked {
let months = self.month();
months_to_quarters(months)
}
Expand All @@ -69,66 +69,66 @@ pub trait DatetimeMethods: AsDatetime {
/// Returns the month number starting from 1.
///
/// The return value ranges from 1 to 12.
fn month(&self) -> UInt32Chunked {
fn month(&self) -> Int8Chunked {
cast_and_apply(self.as_datetime(), temporal::month)
}

/// Extract ISO weekday from underlying NaiveDateTime representation.
/// Returns the weekday number where monday = 1 and sunday = 7
fn weekday(&self) -> UInt32Chunked {
fn weekday(&self) -> Int8Chunked {
cast_and_apply(self.as_datetime(), temporal::weekday)
}

/// Returns the ISO week number starting from 1.
/// The return value ranges from 1 to 53. (The last week of year differs by years.)
fn week(&self) -> UInt32Chunked {
fn week(&self) -> Int8Chunked {
cast_and_apply(self.as_datetime(), temporal::iso_week)
}

/// Extract day from underlying NaiveDateTime representation.
/// Returns the day of month starting from 1.
///
/// The return value ranges from 1 to 31. (The last day of month differs by months.)
fn day(&self) -> UInt32Chunked {
fn day(&self) -> Int8Chunked {
cast_and_apply(self.as_datetime(), temporal::day)
}

/// Extract hour from underlying NaiveDateTime representation.
/// Returns the hour number from 0 to 23.
fn hour(&self) -> UInt32Chunked {
fn hour(&self) -> Int8Chunked {
cast_and_apply(self.as_datetime(), temporal::hour)
}

/// Extract minute from underlying NaiveDateTime representation.
/// Returns the minute number from 0 to 59.
fn minute(&self) -> UInt32Chunked {
fn minute(&self) -> Int8Chunked {
cast_and_apply(self.as_datetime(), temporal::minute)
}

/// Extract second from underlying NaiveDateTime representation.
/// Returns the second number from 0 to 59.
fn second(&self) -> UInt32Chunked {
fn second(&self) -> Int8Chunked {
cast_and_apply(self.as_datetime(), temporal::second)
}

/// Extract second from underlying NaiveDateTime representation.
/// Returns the number of nanoseconds since the whole non-leap second.
/// The range from 1,000,000,000 to 1,999,999,999 represents the leap second.
fn nanosecond(&self) -> UInt32Chunked {
fn nanosecond(&self) -> Int32Chunked {
cast_and_apply(self.as_datetime(), temporal::nanosecond)
}

/// Returns the day of year starting from 1.
///
/// The return value ranges from 1 to 366. (The last day of year differs by years.)
fn ordinal(&self) -> UInt32Chunked {
fn ordinal(&self) -> Int16Chunked {
let ca = self.as_datetime();
let f = match ca.time_unit() {
TimeUnit::Nanoseconds => datetime_to_ordinal_ns,
TimeUnit::Microseconds => datetime_to_ordinal_us,
TimeUnit::Milliseconds => datetime_to_ordinal_ms,
};
ca.apply_kernel_cast::<UInt32Type>(&f)
ca.apply_kernel_cast::<Int16Type>(&f)
}

fn parse_from_str_slice(name: &str, v: &[&str], fmt: &str, tu: TimeUnit) -> DatetimeChunked {
Expand Down

0 comments on commit 97db209

Please sign in to comment.