Skip to content

Commit

Permalink
cast at logical level (#2631)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Feb 13, 2022
1 parent 6d791bd commit 54eea34
Show file tree
Hide file tree
Showing 12 changed files with 157 additions and 115 deletions.
20 changes: 20 additions & 0 deletions polars/polars-core/src/chunked_array/logical/date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,24 @@ impl LogicalType for DateChunked {
fn get_any_value(&self, i: usize) -> AnyValue<'_> {
self.0.get_any_value(i).into_date()
}

fn cast(&self, dtype: &DataType) -> Result<Series> {
use DataType::*;
match (self.dtype(), dtype) {
#[cfg(feature = "dtype-datetime")]
(Date, Datetime(tu, tz)) => {
let casted = self.0.cast(dtype)?;
let casted = casted.datetime().unwrap();
let conversion = match tu {
TimeUnit::Nanoseconds => NS_IN_DAY,
TimeUnit::Microseconds => US_IN_DAY,
TimeUnit::Milliseconds => MS_IN_DAY,
};
Ok((casted.deref() * conversion)
.into_datetime(*tu, tz.clone())
.into_series())
}
_ => self.0.cast(dtype),
}
}
}
55 changes: 55 additions & 0 deletions polars/polars-core/src/chunked_array/logical/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,59 @@ impl LogicalType for DatetimeChunked {
.get_any_value(i)
.into_datetime(self.time_unit(), self.time_zone())
}

fn cast(&self, dtype: &DataType) -> Result<Series> {
use DataType::*;
match (self.dtype(), dtype) {
(Datetime(TimeUnit::Milliseconds, _), Datetime(TimeUnit::Nanoseconds, tz)) => {
Ok((self.0.as_ref() * 1_000_000i64)
.into_datetime(TimeUnit::Nanoseconds, tz.clone())
.into_series())
}
(Datetime(TimeUnit::Milliseconds, _), Datetime(TimeUnit::Microseconds, tz)) => {
Ok((self.0.as_ref() * 1_000i64)
.into_datetime(TimeUnit::Microseconds, tz.clone())
.into_series())
}
(Datetime(TimeUnit::Nanoseconds, _), Datetime(TimeUnit::Milliseconds, tz)) => {
Ok((self.0.as_ref() / 1_000_000i64)
.into_datetime(TimeUnit::Milliseconds, tz.clone())
.into_series())
}
(Datetime(TimeUnit::Nanoseconds, _), Datetime(TimeUnit::Microseconds, tz)) => {
Ok((self.0.as_ref() / 1_000i64)
.into_datetime(TimeUnit::Microseconds, tz.clone())
.into_series())
}
(Datetime(TimeUnit::Microseconds, _), Datetime(TimeUnit::Milliseconds, tz)) => {
Ok((self.0.as_ref() / 1_000i64)
.into_datetime(TimeUnit::Milliseconds, tz.clone())
.into_series())
}
(Datetime(TimeUnit::Microseconds, _), Datetime(TimeUnit::Nanoseconds, tz)) => {
Ok((self.0.as_ref() * 1_000i64)
.into_datetime(TimeUnit::Nanoseconds, tz.clone())
.into_series())
}
#[cfg(feature = "dtype-date")]
(Datetime(tu, _), Date) => match tu {
TimeUnit::Nanoseconds => Ok((self.0.as_ref() / NS_IN_DAY)
.cast(&Int32)
.unwrap()
.into_date()
.into_series()),
TimeUnit::Microseconds => Ok((self.0.as_ref() / US_IN_DAY)
.cast(&Int32)
.unwrap()
.into_date()
.into_series()),
TimeUnit::Milliseconds => Ok((self.0.as_ref() / MS_IN_DAY)
.cast(&Int32)
.unwrap()
.into_date()
.into_series()),
},
_ => self.0.cast(dtype),
}
}
}
27 changes: 27 additions & 0 deletions polars/polars-core/src/chunked_array/logical/duration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,31 @@ impl LogicalType for DurationChunked {
fn get_any_value(&self, i: usize) -> AnyValue<'_> {
self.0.get_any_value(i).into_duration(self.time_unit())
}

fn cast(&self, dtype: &DataType) -> Result<Series> {
use DataType::*;
match (self.dtype(), dtype) {
(Duration(TimeUnit::Milliseconds), Duration(TimeUnit::Nanoseconds)) => {
Ok((self.0.as_ref() * 1_000_000i64)
.into_duration(TimeUnit::Nanoseconds)
.into_series())
}
(Duration(TimeUnit::Milliseconds), Duration(TimeUnit::Microseconds)) => {
Ok((self.0.as_ref() * 1_000i64)
.into_duration(TimeUnit::Microseconds)
.into_series())
}
(Duration(TimeUnit::Nanoseconds), Duration(TimeUnit::Milliseconds)) => {
Ok((self.0.as_ref() / 1_000_000i64)
.into_duration(TimeUnit::Milliseconds)
.into_series())
}
(Duration(TimeUnit::Nanoseconds), Duration(TimeUnit::Microseconds)) => {
Ok((self.0.as_ref() / 1_000i64)
.into_duration(TimeUnit::Microseconds)
.into_series())
}
_ => self.0.cast(dtype),
}
}
}
16 changes: 14 additions & 2 deletions polars/polars-core/src/chunked_array/logical/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
#[cfg(feature = "dtype-date")]
mod date;
#[cfg(feature = "dtype-date")]
pub use date::*;
#[cfg(feature = "dtype-datetime")]
mod datetime;
#[cfg(feature = "dtype-datetime")]
pub use datetime::*;
#[cfg(feature = "dtype-duration")]
mod duration;
#[cfg(feature = "dtype-duration")]
pub use duration::*;
#[cfg(feature = "dtype-time")]
mod time;

pub use {date::*, datetime::*, duration::*, time::*};
#[cfg(feature = "dtype-time")]
pub use time::*;

use crate::prelude::*;
use std::marker::PhantomData;
Expand Down Expand Up @@ -52,6 +62,8 @@ pub trait LogicalType {
fn get_any_value(&self, _i: usize) -> AnyValue<'_> {
unimplemented!()
}

fn cast(&self, dtype: &DataType) -> Result<Series>;
}

impl<K: PolarsDataType, T: PolarsDataType> Logical<K, T>
Expand Down
4 changes: 4 additions & 0 deletions polars/polars-core/src/chunked_array/logical/time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,8 @@ impl LogicalType for TimeChunked {
fn get_any_value(&self, i: usize) -> AnyValue<'_> {
self.0.get_any_value(i).into_time()
}

fn cast(&self, dtype: &DataType) -> Result<Series> {
self.0.cast(dtype)
}
}
1 change: 1 addition & 0 deletions polars/polars-core/src/chunked_array/temporal/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ pub mod conversion;
mod date;
#[cfg(feature = "dtype-datetime")]
mod datetime;
#[cfg(feature = "dtype-duration")]
mod duration;
#[cfg(feature = "dtype-time")]
mod time;
Expand Down
3 changes: 3 additions & 0 deletions polars/polars-core/src/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ impl Debug for Series {
DataType::Float64 => {
format_array!(limit, f, self.f64().unwrap(), "f64", self.name(), "Series")
}
#[cfg(feature = "dtype-date")]
DataType::Date => format_array!(
limit,
f,
Expand All @@ -245,6 +246,7 @@ impl Debug for Series {
self.name(),
"Series"
),
#[cfg(feature = "dtype-datetime")]
DataType::Datetime(_, _) => {
let dt = format!("{}", self.dtype());
format_array!(
Expand All @@ -256,6 +258,7 @@ impl Debug for Series {
"Series"
)
}
#[cfg(feature = "dtype-duration")]
DataType::Duration(_) => {
let dt = format!("{}", self.dtype());
format_array!(
Expand Down
29 changes: 19 additions & 10 deletions polars/polars-core/src/serde/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,6 @@ impl Serialize for Series {
ca.serialize(serializer)
} else if let Ok(ca) = self.f64() {
ca.serialize(serializer)
} else if let Ok(ca) = self.date() {
ca.serialize(serializer)
} else if self.datetime().is_ok() {
let s = self
.cast(&DataType::Datetime(TimeUnit::Microseconds, None))
.unwrap();
let ca = s.datetime().unwrap();
ca.serialize(serializer)
} else if let Ok(ca) = self.utf8() {
ca.serialize(serializer)
} else if let Ok(ca) = self.bool() {
Expand All @@ -49,8 +41,25 @@ impl Serialize for Series {
} else if let Ok(ca) = self.list() {
ca.serialize(serializer)
} else {
// cast small integers to i32
self.cast(&DataType::Int32).unwrap().serialize(serializer)
match self.dtype() {
#[cfg(feature = "dtype-date")]
DataType::Date => {
let ca = self.date().unwrap();
ca.serialize(serializer)
}
#[cfg(feature = "dtype-datetime")]
DataType::Datetime(_, _) => {
let s = self
.cast(&DataType::Datetime(TimeUnit::Microseconds, None))
.unwrap();
let ca = s.datetime().unwrap();
ca.serialize(serializer)
}
_ => {
// cast small integers to i32
self.cast(&DataType::Int32).unwrap().serialize(serializer)
}
}
}
}
}
Expand Down
21 changes: 3 additions & 18 deletions polars/polars-core/src/series/implementations/dates_time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ macro_rules! impl_dyn_series {
self.0.shrink_to_fit()
}

#[cfg(feature = "dtype-time")]
fn time(&self) -> Result<&TimeChunked> {
if matches!(self.0.dtype(), DataType::Time) {
unsafe { Ok(&*(self as *const dyn SeriesTrait as *const TimeChunked)) }
Expand All @@ -276,6 +277,7 @@ macro_rules! impl_dyn_series {
}
}

#[cfg(feature = "dtype-date")]
fn date(&self) -> Result<&DateChunked> {
if matches!(self.0.dtype(), DataType::Date) {
unsafe { Ok(&*(self as *const dyn SeriesTrait as *const DateChunked)) }
Expand Down Expand Up @@ -398,24 +400,7 @@ macro_rules! impl_dyn_series {
}

fn cast(&self, data_type: &DataType) -> Result<Series> {
use DataType::*;
let ca = match (self.dtype(), data_type) {
#[cfg(feature = "dtype-datetime")]
(Date, Datetime(tu, tz)) => {
let casted = self.0.cast(data_type)?;
let casted = casted.datetime().unwrap();
let conversion = match tu {
TimeUnit::Nanoseconds => NS_IN_DAY,
TimeUnit::Microseconds => US_IN_DAY,
TimeUnit::Milliseconds => MS_IN_DAY,
};
return Ok((casted.deref() * conversion)
.into_datetime(*tu, tz.clone())
.into_series());
}
_ => Cow::Borrowed(self.0.deref()),
};
ca.cast(data_type)
self.0.cast(data_type)
}

fn to_dummies(&self) -> Result<DataFrame> {
Expand Down
60 changes: 1 addition & 59 deletions polars/polars-core/src/series/implementations/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -402,65 +402,7 @@ impl SeriesTrait for SeriesWrap<DatetimeChunked> {
}

fn cast(&self, data_type: &DataType) -> Result<Series> {
use DataType::*;
let ca = match (self.dtype(), data_type) {
(Datetime(TimeUnit::Milliseconds, _), Datetime(TimeUnit::Nanoseconds, tz)) => {
return Ok((self.0.as_ref() * 1_000_000i64)
.into_datetime(TimeUnit::Nanoseconds, tz.clone())
.into_series())
}
(Datetime(TimeUnit::Milliseconds, _), Datetime(TimeUnit::Microseconds, tz)) => {
return Ok((self.0.as_ref() * 1_000i64)
.into_datetime(TimeUnit::Microseconds, tz.clone())
.into_series())
}
(Datetime(TimeUnit::Nanoseconds, _), Datetime(TimeUnit::Milliseconds, tz)) => {
return Ok((self.0.as_ref() / 1_000_000i64)
.into_datetime(TimeUnit::Milliseconds, tz.clone())
.into_series())
}
(Datetime(TimeUnit::Nanoseconds, _), Datetime(TimeUnit::Microseconds, tz)) => {
return Ok((self.0.as_ref() / 1_000i64)
.into_datetime(TimeUnit::Microseconds, tz.clone())
.into_series())
}
(Datetime(TimeUnit::Microseconds, _), Datetime(TimeUnit::Milliseconds, tz)) => {
return Ok((self.0.as_ref() / 1_000i64)
.into_datetime(TimeUnit::Milliseconds, tz.clone())
.into_series())
}
(Datetime(TimeUnit::Microseconds, _), Datetime(TimeUnit::Nanoseconds, tz)) => {
return Ok((self.0.as_ref() * 1_000i64)
.into_datetime(TimeUnit::Nanoseconds, tz.clone())
.into_series())
}
#[cfg(feature = "dtype-date")]
(Datetime(tu, _), Date) => match tu {
TimeUnit::Nanoseconds => {
return Ok((self.0.as_ref() / NS_IN_DAY)
.cast(&Int32)
.unwrap()
.into_date()
.into_series());
}
TimeUnit::Microseconds => {
return Ok((self.0.as_ref() / US_IN_DAY)
.cast(&Int32)
.unwrap()
.into_date()
.into_series());
}
TimeUnit::Milliseconds => {
return Ok((self.0.as_ref() / MS_IN_DAY)
.cast(&Int32)
.unwrap()
.into_date()
.into_series());
}
},
_ => Cow::Borrowed(self.0.deref()),
};
ca.cast(data_type)
self.0.cast(data_type)
}

fn to_dummies(&self) -> Result<DataFrame> {
Expand Down
26 changes: 1 addition & 25 deletions polars/polars-core/src/series/implementations/duration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -387,31 +387,7 @@ impl SeriesTrait for SeriesWrap<DurationChunked> {
}

fn cast(&self, data_type: &DataType) -> Result<Series> {
use DataType::*;
let ca = match (self.dtype(), data_type) {
(Duration(TimeUnit::Milliseconds), Duration(TimeUnit::Nanoseconds)) => {
return Ok((self.0.as_ref() * 1_000_000i64)
.into_duration(TimeUnit::Nanoseconds)
.into_series())
}
(Duration(TimeUnit::Milliseconds), Duration(TimeUnit::Microseconds)) => {
return Ok((self.0.as_ref() * 1_000i64)
.into_duration(TimeUnit::Microseconds)
.into_series())
}
(Duration(TimeUnit::Nanoseconds), Duration(TimeUnit::Milliseconds)) => {
return Ok((self.0.as_ref() / 1_000_000i64)
.into_duration(TimeUnit::Milliseconds)
.into_series())
}
(Duration(TimeUnit::Nanoseconds), Duration(TimeUnit::Microseconds)) => {
return Ok((self.0.as_ref() / 1_000i64)
.into_duration(TimeUnit::Microseconds)
.into_series())
}
_ => Cow::Borrowed(self.0.deref()),
};
ca.cast(data_type)
self.0.cast(data_type)
}

fn to_dummies(&self) -> Result<DataFrame> {
Expand Down

0 comments on commit 54eea34

Please sign in to comment.