Skip to content

Commit

Permalink
feat[rust, python]: cast timezone (#4976)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Sep 25, 2022
1 parent 3511e40 commit 47baabd
Show file tree
Hide file tree
Showing 16 changed files with 157 additions and 1 deletion.
2 changes: 1 addition & 1 deletion polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ to_dummies = ["polars-ops/to_dummies"]
bigidx = ["polars-core/bigidx", "polars-lazy/bigidx"]
list_to_struct = ["polars-ops/list_to_struct", "polars-lazy/list_to_struct"]
describe = ["polars-core/describe"]
timezones = ["polars-core/timezones"]
timezones = ["polars-core/timezones", "polars-lazy/timezones"]
string_justify = ["polars-lazy/string_justify", "polars-ops/string_justify"]
arg_where = ["polars-lazy/arg_where"]
search_sorted = ["polars-lazy/search_sorted"]
Expand Down
1 change: 1 addition & 0 deletions polars/polars-lazy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ object = ["polars-core/object"]
date_offset = []
trigonometry = []
sign = []
timezones = ["polars-time/timezones", "polars-core/timezones"]

true_div = []

Expand Down
10 changes: 10 additions & 0 deletions polars/polars-lazy/src/dsl/dt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,17 @@ impl DateLikeNameSpace {
/// Offset this `Date/Datetime` by a given offset [`Duration`].
/// This will take leap years/ months into account.
#[cfg(feature = "date_offset")]
#[cfg_attr(docsrs, doc(cfg(feature = "date_offset")))]
pub fn offset_by(self, by: Duration) -> Expr {
self.0.map_private(FunctionExpr::DateOffset(by))
}

#[cfg(feature = "timezones")]
#[cfg_attr(docsrs, doc(cfg(feature = "timezones")))]
pub fn cast_time_zone(self, tz: TimeZone) -> Expr {
self.0
.map_private(FunctionExpr::TemporalExpr(TemporalFunction::CastTimezone(
tz,
)))
}
}
9 changes: 9 additions & 0 deletions polars/polars-lazy/src/dsl/function_expr/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ pub enum TemporalFunction {
Microsecond,
Nanosecond,
TimeStamp(TimeUnit),
#[cfg(feature = "timezones")]
CastTimezone(TimeZone),
}

impl Display for TemporalFunction {
Expand All @@ -42,6 +44,8 @@ impl Display for TemporalFunction {
Microsecond => "microsecond",
Nanosecond => "nanosecond",
TimeStamp(tu) => return write!(f, "dt.timestamp({})", tu),
#[cfg(feature = "timezones")]
CastTimezone(_) => "cast_timezone",
};
write!(f, "dt.{}", s)
}
Expand Down Expand Up @@ -92,3 +96,8 @@ pub(super) fn nanosecond(s: &Series) -> PolarsResult<Series> {
pub(super) fn timestamp(s: &Series, tu: TimeUnit) -> PolarsResult<Series> {
s.timestamp(tu).map(|ca| ca.into_series())
}
#[cfg(feature = "timezones")]
pub(super) fn cast_timezone(s: &Series, tz: &str) -> PolarsResult<Series> {
let ca = s.datetime()?;
ca.cast_timezone(tz).map(|ca| ca.into_series())
}
2 changes: 2 additions & 0 deletions polars/polars-lazy/src/dsl/function_expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,8 @@ impl From<TemporalFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
Microsecond => map!(datetime::microsecond),
Nanosecond => map!(datetime::nanosecond),
TimeStamp(tu) => map!(datetime::timestamp, tu),
#[cfg(feature = "timezones")]
CastTimezone(tz) => map!(datetime::cast_timezone, &tz),
}
}
}
20 changes: 20 additions & 0 deletions polars/polars-lazy/src/dsl/function_expr/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@ impl FunctionExpr {
Ok(Field::new(fields[0].name(), dtype))
};

// map a single dtype
#[cfg(feature = "timezones")]
let try_map_dtype = |func: &dyn Fn(&DataType) -> PolarsResult<DataType>| {
let dtype = func(fields[0].data_type())?;
let out: PolarsResult<_> = Ok(Field::new(fields[0].name(), dtype));
out
};

// map all dtypes
#[cfg(feature = "list")]
let map_dtypes = |func: &dyn Fn(&[&DataType]) -> DataType| {
Expand Down Expand Up @@ -117,6 +125,18 @@ impl FunctionExpr {
Month | Quarter | Week | WeekDay | Day | OrdinalDay | Hour | Minute
| Millisecond | Microsecond | Nanosecond | Second => DataType::UInt32,
TimeStamp(_) => DataType::Int64,
#[cfg(feature = "timezones")]
CastTimezone(tz) => {
return try_map_dtype(&|dt| {
if let DataType::Datetime(tu, _) = dt {
Ok(DataType::Datetime(*tu, Some(tz.clone())))
} else {
Err(PolarsError::SchemaMisMatch(
format!("expected Datetime got {:?}", dt).into(),
))
}
})
}
};
with_dtype(dtype)
}
Expand Down
2 changes: 2 additions & 0 deletions polars/polars-time/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ description = "Time related code for the polars dataframe library"

[dependencies]
chrono = "0.4"
chrono-tz = { version = "0.6", optional = true }
lexical = { version = "6", default-features = false, features = ["std", "parse-floats", "parse-integers"] }
polars-arrow = { version = "0.24.1", path = "../polars-arrow", features = ["compute", "temporal"] }
polars-core = { version = "0.24.1", path = "../polars-core", default-features = false, features = ["private", "dtype-datetime", "dtype-duration", "dtype-time", "dtype-date"] }
Expand All @@ -24,6 +25,7 @@ dtype-duration = ["polars-core/dtype-duration", "polars-core/temporal"]
rolling_window = ["polars-core/rolling_window", "dtype-duration"]
private = []
fmt = ["polars-core/fmt"]
timezones = ["chrono-tz"]

test = [
"dtype-date",
Expand Down
21 changes: 21 additions & 0 deletions polars/polars-time/src/chunkedarray/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,27 @@ pub trait DatetimeMethods: AsDatetime {
)
.into_datetime(tu, None)
}

#[cfg(feature = "timezones")]
fn cast_timezone(&self, tz: &str) -> PolarsResult<DatetimeChunked> {
use chrono_tz::Tz;
let ca = self.as_datetime();

if let Some(from) = ca.time_zone() {
let from: Tz = from.parse().map_err(|_| {
PolarsError::ComputeError(format!("Could not parse timezone: '{}'", tz).into())
})?;
let to: Tz = tz.parse().map_err(|_| {
PolarsError::ComputeError(format!("Could not parse timezone: '{}'", tz).into())
})?;
let out = ca.apply_kernel(&|arr| kernels::cast_timezone(arr, ca.time_unit(), from, to));
Ok(out.into_datetime(ca.time_unit(), Some(tz.to_string())))
} else {
Err(PolarsError::ComputeError(
"Cannot cast Naive Datetime. First set a timezone".into(),
))
}
}
}

pub trait AsDatetime {
Expand Down
43 changes: 43 additions & 0 deletions polars/polars-time/src/chunkedarray/kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,3 +225,46 @@ to_temporal_unit!(
i64,
ArrowDataType::Int32
);

#[cfg(all(feature = "dtype-datetime", feature = "timezones"))]
pub(crate) fn cast_timezone(
arr: &PrimitiveArray<i64>,
tu: TimeUnit,
from: chrono_tz::Tz,
to: chrono_tz::Tz,
) -> ArrayRef {
use chrono::TimeZone;

match tu {
TimeUnit::Milliseconds => Box::new(unary(
arr,
|value| {
let ndt = timestamp_ms_to_datetime(value);
let tz_aware = from.from_local_datetime(&ndt).unwrap();
let new_tz_aware = tz_aware.with_timezone(&to);
new_tz_aware.timestamp_millis()
},
ArrowDataType::Int64,
)),
TimeUnit::Microseconds => Box::new(unary(
arr,
|value| {
let ndt = timestamp_us_to_datetime(value);
let tz_aware = from.from_local_datetime(&ndt).unwrap();
let new_tz_aware = tz_aware.with_timezone(&to);
new_tz_aware.timestamp_micros()
},
ArrowDataType::Int64,
)),
TimeUnit::Nanoseconds => Box::new(unary(
arr,
|value| {
let ndt = timestamp_ns_to_datetime(value);
let tz_aware = from.from_local_datetime(&ndt).unwrap();
let new_tz_aware = tz_aware.with_timezone(&to);
new_tz_aware.timestamp_nanos()
},
ArrowDataType::Int64,
)),
}
}
1 change: 1 addition & 0 deletions py-polars/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions py-polars/docs/source/reference/expression.rst
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@ The following methods are available under the `expr.dt` attribute.
:toctree: api/

ExprDateTimeNameSpace.cast_time_unit
ExprDateTimeNameSpace.cast_time_zone
ExprDateTimeNameSpace.day
ExprDateTimeNameSpace.days
ExprDateTimeNameSpace.epoch
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ The following methods are available under the `Series.dt` attribute.
:toctree: api/

DateTimeNameSpace.cast_time_unit
DateTimeNameSpace.cast_time_zone
DateTimeNameSpace.day
DateTimeNameSpace.days
DateTimeNameSpace.epoch
Expand Down
15 changes: 15 additions & 0 deletions py-polars/polars/internals/expr/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -946,6 +946,21 @@ def with_time_zone(self, tz: str | None) -> pli.Expr:
"""
return pli.wrap_expr(self._pyexpr.dt_with_time_zone(tz))

def cast_time_zone(self, tz: str) -> pli.Expr:
"""
Cast time zone for a Series of type Datetime.
Different from ``with_time_zone``, this will also modify
the underlying timestamp,
Parameters
----------
tz
Time zone for the `Datetime` Series.
"""
return pli.wrap_expr(self._pyexpr.dt_cast_time_zone(tz))

def days(self) -> pli.Expr:
"""
Extract the days from a Duration type.
Expand Down
14 changes: 14 additions & 0 deletions py-polars/polars/internals/series/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,20 @@ def with_time_zone(self, tz: str | None) -> pli.Series:
"""

def cast_time_zone(self, tz: str) -> pli.Series:
"""
Cast time zone for a Series of type Datetime.
Different from ``with_time_zone``, this will also modify
the underlying timestamp,
Parameters
----------
tz
Time zone for the `Datetime` Series.
"""

def days(self) -> pli.Series:
"""
Extract the days from a Duration type.
Expand Down
4 changes: 4 additions & 0 deletions py-polars/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -886,6 +886,10 @@ impl PyExpr {
self.inner.clone().dt().cast_time_unit(tu.0).into()
}

pub fn dt_cast_time_zone(&self, tz: String) -> PyExpr {
self.inner.clone().dt().cast_time_zone(tz).into()
}

pub fn rolling_apply(
&self,
py: Python,
Expand Down
12 changes: 12 additions & 0 deletions py-polars/tests/unit/test_datelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1522,3 +1522,15 @@ def test_invalid_date_parsing_4898() -> None:
assert pl.Series(["2022-09-18", "2022-09-50"]).str.strptime(
pl.Date, "%Y-%m-%d", strict=False
).to_list() == [date(2022, 9, 18), None]


def test_cast_timezone() -> None:
assert pl.DataFrame({"a": [datetime(2022, 9, 25, 14)]}).with_column(
pl.col("a")
.dt.with_time_zone("America/New_York")
.dt.cast_time_zone("UTC")
.alias("b")
).to_dict(False) == {
"a": [datetime(2022, 9, 25, 14, 0)],
"b": [datetime(2022, 9, 25, 18, 0)],
}

0 comments on commit 47baabd

Please sign in to comment.