Skip to content

Commit

Permalink
fix(rust, python): tz-aware strftime (#5525)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Nov 16, 2022
1 parent 8efb0a0 commit bb563c3
Show file tree
Hide file tree
Showing 9 changed files with 135 additions and 73 deletions.
3 changes: 3 additions & 0 deletions polars/polars-arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ description = "Arrow interfaces for Polars DataFrame library"

[dependencies]
arrow.workspace = true
chrono = { version = "0.4", optional = true }
chrono-tz = { version = "0.6", optional = true }
hashbrown.workspace = true
num.workspace = true
serde = { version = "1", features = ["derive"], optional = true }
Expand All @@ -23,3 +25,4 @@ temporal = ["arrow/compute_temporal"]
bigidx = []
performant = []
like = ["arrow/compute_like"]
timezones = ["chrono-tz", "chrono"]
4 changes: 4 additions & 0 deletions polars/polars-arrow/src/kernels/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ pub mod sorted_join;
#[cfg(feature = "strings")]
pub mod string;
pub mod take_agg;
#[cfg(feature = "timezones")]
mod time;
#[cfg(feature = "timezones")]
pub use time::cast_timezone;

/// Internal state of [SlicesIterator]
#[derive(Debug, PartialEq)]
Expand Down
51 changes: 51 additions & 0 deletions polars/polars-arrow/src/kernels/time.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
use arrow::array::PrimitiveArray;
use arrow::compute::arity::unary;
use arrow::datatypes::{DataType as ArrowDataType, TimeUnit};
use arrow::temporal_conversions::{
timestamp_ms_to_datetime, timestamp_ns_to_datetime, timestamp_us_to_datetime,
};

use crate::prelude::ArrayRef;

pub fn cast_timezone(
arr: &PrimitiveArray<i64>,
tu: TimeUnit,
from: chrono_tz::Tz,
to: chrono_tz::Tz,
) -> ArrayRef {
use chrono::TimeZone;

match tu {
TimeUnit::Millisecond => Box::new(unary(
arr,
|value| {
let ndt = timestamp_ms_to_datetime(value);
let tz_aware = from.from_local_datetime(&ndt).unwrap();
let new_tz_aware = tz_aware.with_timezone(&to);
new_tz_aware.naive_local().timestamp_millis()
},
ArrowDataType::Int64,
)),
TimeUnit::Microsecond => Box::new(unary(
arr,
|value| {
let ndt = timestamp_us_to_datetime(value);
let tz_aware = from.from_local_datetime(&ndt).unwrap();
let new_tz_aware = tz_aware.with_timezone(&to);
new_tz_aware.naive_local().timestamp_micros()
},
ArrowDataType::Int64,
)),
TimeUnit::Nanosecond => Box::new(unary(
arr,
|value| {
let ndt = timestamp_ns_to_datetime(value);
let tz_aware = from.from_local_datetime(&ndt).unwrap();
let new_tz_aware = tz_aware.with_timezone(&to);
new_tz_aware.naive_local().timestamp_nanos()
},
ArrowDataType::Int64,
)),
_ => unreachable!(),
}
}
2 changes: 1 addition & 1 deletion polars/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ partition_by = []
semi_anti_join = []
chunked_ids = []
describe = []
timezones = ["chrono-tz", "arrow/chrono-tz"]
timezones = ["chrono-tz", "arrow/chrono-tz", "polars-arrow/timezones"]

dynamic_groupby = ["dtype-datetime", "dtype-date"]

Expand Down
41 changes: 40 additions & 1 deletion polars/polars-core/src/chunked_array/temporal/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ use std::fmt::Write;
use arrow::temporal_conversions::{
timestamp_ms_to_datetime, timestamp_ns_to_datetime, timestamp_us_to_datetime,
};
#[cfg(feature = "timezones")]
use polars_arrow::kernels::cast_timezone;

use super::conversion::{datetime_to_timestamp_ms, datetime_to_timestamp_ns};
use super::*;
Expand Down Expand Up @@ -40,6 +42,33 @@ impl DatetimeChunked {
}
}

#[cfg(feature = "timezones")]
pub fn apply_tz_offset(&self, tz: &str) -> PolarsResult<DatetimeChunked> {
let keep_tz = self.time_zone().clone();
Ok(self.cast_time_zone(tz)?.with_time_zone(keep_tz))
}

#[cfg(feature = "timezones")]
pub fn cast_time_zone(&self, tz: &str) -> PolarsResult<DatetimeChunked> {
use chrono_tz::Tz;

if let Some(from) = self.time_zone() {
let from: Tz = from.parse().map_err(|_| {
PolarsError::ComputeError(format!("Could not parse timezone: '{}'", tz).into())
})?;
let to: Tz = tz.parse().map_err(|_| {
PolarsError::ComputeError(format!("Could not parse timezone: '{}'", tz).into())
})?;
let out =
self.apply_kernel(&|arr| cast_timezone(arr, self.time_unit().to_arrow(), from, to));
Ok(out.into_datetime(self.time_unit(), Some(tz.to_string())))
} else {
Err(PolarsError::ComputeError(
"Cannot cast Naive Datetime. First set a timezone".into(),
))
}
}

/// Format Datetime with a `fmt` rule. See [chrono strftime/strptime](https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html).
pub fn strftime(&self, fmt: &str) -> Utf8Chunked {
let conversion_f = match self.time_unit() {
Expand All @@ -54,7 +83,17 @@ impl DatetimeChunked {
.unwrap();
let fmted = format!("{}", dt.format(fmt));

let mut ca: Utf8Chunked = self.apply_kernel_cast(&|arr| {
#[allow(unused_mut)]
let mut ca = self.clone();
#[cfg(feature = "timezones")]
if let Some(tz) = self.time_zone() {
ca = ca
.with_time_zone(Some("UTC".into()))
.cast_time_zone(tz)
.unwrap();
}

let mut ca: Utf8Chunked = ca.apply_kernel_cast(&|arr| {
let mut buf = String::new();
let mut mutarr =
MutableUtf8Array::with_capacities(arr.len(), arr.len() * fmted.len() + 1);
Expand Down
28 changes: 0 additions & 28 deletions polars/polars-time/src/chunkedarray/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,34 +144,6 @@ pub trait DatetimeMethods: AsDatetime {
)
.into_datetime(tu, None)
}

#[cfg(feature = "timezones")]
fn apply_tz_offset(&self, tz: &str) -> PolarsResult<DatetimeChunked> {
let ca = self.as_datetime();
let keep_tz = ca.time_zone().clone();
Ok(self.cast_time_zone(tz)?.with_time_zone(keep_tz))
}

#[cfg(feature = "timezones")]
fn cast_time_zone(&self, tz: &str) -> PolarsResult<DatetimeChunked> {
use chrono_tz::Tz;
let ca = self.as_datetime();

if let Some(from) = ca.time_zone() {
let from: Tz = from.parse().map_err(|_| {
PolarsError::ComputeError(format!("Could not parse timezone: '{}'", tz).into())
})?;
let to: Tz = tz.parse().map_err(|_| {
PolarsError::ComputeError(format!("Could not parse timezone: '{}'", tz).into())
})?;
let out = ca.apply_kernel(&|arr| kernels::cast_timezone(arr, ca.time_unit(), from, to));
Ok(out.into_datetime(ca.time_unit(), Some(tz.to_string())))
} else {
Err(PolarsError::ComputeError(
"Cannot cast Naive Datetime. First set a timezone".into(),
))
}
}
}

pub trait AsDatetime {
Expand Down
43 changes: 0 additions & 43 deletions polars/polars-time/src/chunkedarray/kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,46 +225,3 @@ to_temporal_unit!(
i64,
ArrowDataType::Int32
);

#[cfg(all(feature = "dtype-datetime", feature = "timezones"))]
pub(crate) fn cast_timezone(
arr: &PrimitiveArray<i64>,
tu: TimeUnit,
from: chrono_tz::Tz,
to: chrono_tz::Tz,
) -> ArrayRef {
use chrono::TimeZone;

match tu {
TimeUnit::Milliseconds => Box::new(unary(
arr,
|value| {
let ndt = timestamp_ms_to_datetime(value);
let tz_aware = from.from_local_datetime(&ndt).unwrap();
let new_tz_aware = tz_aware.with_timezone(&to);
new_tz_aware.naive_local().timestamp_millis()
},
ArrowDataType::Int64,
)),
TimeUnit::Microseconds => Box::new(unary(
arr,
|value| {
let ndt = timestamp_us_to_datetime(value);
let tz_aware = from.from_local_datetime(&ndt).unwrap();
let new_tz_aware = tz_aware.with_timezone(&to);
new_tz_aware.naive_local().timestamp_micros()
},
ArrowDataType::Int64,
)),
TimeUnit::Nanoseconds => Box::new(unary(
arr,
|value| {
let ndt = timestamp_ns_to_datetime(value);
let tz_aware = from.from_local_datetime(&ndt).unwrap();
let new_tz_aware = tz_aware.with_timezone(&to);
new_tz_aware.naive_local().timestamp_nanos()
},
ArrowDataType::Int64,
)),
}
}
2 changes: 2 additions & 0 deletions py-polars/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 34 additions & 0 deletions py-polars/tests/unit/test_datelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1916,3 +1916,37 @@ def test_tz_aware_truncate() -> None:
),
],
}


def test_tz_aware_strftime() -> None:
df = pl.DataFrame(
{
"dt": pl.date_range(
low=datetime(2022, 11, 1), high=datetime(2022, 11, 4), interval="24h"
).dt.tz_localize("America/New_York")
}
)
assert df.with_column(pl.col("dt").dt.strftime("%c").alias("fmt")).to_dict(
False
) == {
"dt": [
datetime(
2022, 11, 1, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="America/New_York")
),
datetime(
2022, 11, 2, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="America/New_York")
),
datetime(
2022, 11, 3, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="America/New_York")
),
datetime(
2022, 11, 4, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="America/New_York")
),
],
"fmt": [
"Tue Nov 1 00:00:00 2022",
"Wed Nov 2 00:00:00 2022",
"Thu Nov 3 00:00:00 2022",
"Fri Nov 4 00:00:00 2022",
],
}

0 comments on commit bb563c3

Please sign in to comment.