Skip to content

Commit

Permalink
chore(python): Switch over some of the custom Python date/time conver…
Browse files Browse the repository at this point in the history
…sions to native PyO3 conversions (#16203)

Co-authored-by: Itamar Turner-Trauring <itamar@pythonspeed.com>
  • Loading branch information
itamarst and pythonspeed committed May 14, 2024
1 parent 3b21311 commit 004efbe
Show file tree
Hide file tree
Showing 7 changed files with 116 additions and 81 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion py-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ ndarray = { workspace = true }
num-traits = { workspace = true }
numpy = { version = "0.21", default-features = false }
once_cell = { workspace = true }
pyo3 = { workspace = true, features = ["abi3-py38", "extension-module", "multiple-pymethods"] }
pyo3 = { workspace = true, features = ["abi3-py38", "chrono", "extension-module", "multiple-pymethods"] }
pyo3-built = { version = "0.5", optional = true }
recursive = { workspace = true }
serde_json = { workspace = true, optional = true }
Expand Down
99 changes: 47 additions & 52 deletions py-polars/src/conversion/any_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,17 @@ use std::borrow::Cow;
use polars::chunked_array::object::PolarsObjectSafe;
use polars::datatypes::{DataType, Field, OwnedObject, PlHashMap, TimeUnit};
use polars::prelude::{AnyValue, Series};
use polars_core::export::chrono::{NaiveDate, NaiveTime, TimeDelta, Timelike};
use polars_core::utils::any_values_to_supertype_and_n_dtypes;
use polars_core::utils::arrow::temporal_conversions::date32_to_date;
use pyo3::exceptions::{PyOverflowError, PyTypeError};
use pyo3::intern;
use pyo3::prelude::*;
use pyo3::types::{PyBool, PyBytes, PyDict, PyFloat, PyInt, PyList, PySequence, PyString, PyTuple};

use super::datetime::{
elapsed_offset_to_timedelta, nanos_since_midnight_to_naivetime, timestamp_to_naive_datetime,
};
use super::{decimal_to_digits, struct_dict, ObjectValue, Wrap};
use crate::error::PyPolarsErr;
use crate::py_modules::{SERIES, UTILS};
Expand Down Expand Up @@ -59,26 +64,32 @@ pub(crate) fn any_value_into_py_object(av: AnyValue, py: Python) -> PyObject {
s.into_py(py)
},
AnyValue::Date(v) => {
let convert = utils.getattr(intern!(py, "to_py_date")).unwrap();
convert.call1((v,)).unwrap().into_py(py)
let date = date32_to_date(v);
date.into_py(py)
},
AnyValue::Datetime(v, time_unit, time_zone) => {
let convert = utils.getattr(intern!(py, "to_py_datetime")).unwrap();
let time_unit = time_unit.to_ascii();
convert
.call1((v, time_unit, time_zone.as_ref().map(|s| s.as_str())))
.unwrap()
.into_py(py)
if let Some(time_zone) = time_zone {
// When https://github.com/pola-rs/polars/issues/16199 is
// implemented, we'll switch to something like:
//
// let tz: chrono_tz::Tz = time_zone.parse().unwrap();
// let datetime = tz.from_local_datetime(&naive_datetime).earliest().unwrap();
// datetime.into_py(py)
let convert = utils.getattr(intern!(py, "to_py_datetime")).unwrap();
let time_unit = time_unit.to_ascii();
convert
.call1((v, time_unit, time_zone.as_str()))
.unwrap()
.into_py(py)
} else {
timestamp_to_naive_datetime(v, time_unit).into_py(py)
}
},
AnyValue::Duration(v, time_unit) => {
let convert = utils.getattr(intern!(py, "to_py_timedelta")).unwrap();
let time_unit = time_unit.to_ascii();
convert.call1((v, time_unit)).unwrap().into_py(py)
},
AnyValue::Time(v) => {
let convert = utils.getattr(intern!(py, "to_py_time")).unwrap();
convert.call1((v,)).unwrap().into_py(py)
let time_delta = elapsed_offset_to_timedelta(v, time_unit);
time_delta.into_py(py)
},
AnyValue::Time(v) => nanos_since_midnight_to_naivetime(v).into_py(py),
AnyValue::Array(v, _) | AnyValue::List(v) => PySeries::new(v).to_list(),
ref av @ AnyValue::Struct(_, _, flds) => struct_dict(py, av._iter_struct_av(), flds),
AnyValue::StructOwned(payload) => struct_dict(py, payload.0.into_iter(), &payload.1),
Expand Down Expand Up @@ -176,19 +187,16 @@ pub(crate) fn py_object_to_any_value<'py>(
}

fn get_date(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
Python::with_gil(|py| {
let date = UTILS
.bind(py)
.getattr(intern!(py, "date_to_int"))
.unwrap()
.call1((ob,))
.unwrap();
let v = date.extract::<i32>().unwrap();
Ok(AnyValue::Date(v))
})
// unwrap() isn't yet const safe.
const UNIX_EPOCH: Option<NaiveDate> = NaiveDate::from_ymd_opt(1970, 1, 1);
let date = ob.extract::<NaiveDate>()?;
let elapsed = date.signed_duration_since(UNIX_EPOCH.unwrap());
Ok(AnyValue::Date(elapsed.num_days() as i32))
}

fn get_datetime(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
// Probably needs to wait for
// https://github.com/pola-rs/polars/issues/16199 to do it a faster way.
Python::with_gil(|py| {
let date = UTILS
.bind(py)
Expand All @@ -202,36 +210,23 @@ pub(crate) fn py_object_to_any_value<'py>(
}

fn get_timedelta(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
Python::with_gil(|py| {
let f = UTILS
.bind(py)
.getattr(intern!(py, "timedelta_to_int"))
.unwrap();
let py_int = f.call1((ob, intern!(py, "us"))).unwrap();

let av = if let Ok(v) = py_int.extract::<i64>() {
AnyValue::Duration(v, TimeUnit::Microseconds)
} else {
// This should be faster than calling `timedelta_to_int` again with `"ms"` input.
let v_us = py_int.extract::<i128>().unwrap();
let v = (v_us / 1000) as i64;
AnyValue::Duration(v, TimeUnit::Milliseconds)
};
Ok(av)
})
let timedelta = ob.extract::<TimeDelta>()?;
if let Some(micros) = timedelta.num_microseconds() {
Ok(AnyValue::Duration(micros, TimeUnit::Microseconds))
} else {
Ok(AnyValue::Duration(
timedelta.num_milliseconds(),
TimeUnit::Milliseconds,
))
}
}

fn get_time(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
Python::with_gil(|py| {
let time = UTILS
.bind(py)
.getattr(intern!(py, "time_to_int"))
.unwrap()
.call1((ob,))
.unwrap();
let v = time.extract::<i64>().unwrap();
Ok(AnyValue::Time(v))
})
let time = ob.extract::<NaiveTime>()?;

Ok(AnyValue::Time(
(time.num_seconds_from_midnight() as i64) * 1_000_000_000 + time.nanosecond() as i64,
))
}

fn get_decimal(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
Expand Down
61 changes: 34 additions & 27 deletions py-polars/src/conversion/chunked_array.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
use polars_core::export::chrono::NaiveTime;
use polars_core::utils::arrow::temporal_conversions::date32_to_date;
use pyo3::intern;
use pyo3::prelude::*;
use pyo3::types::{PyBytes, PyList, PyTuple};

use super::datetime::{
elapsed_offset_to_timedelta, nanos_since_midnight_to_naivetime, timestamp_to_naive_datetime,
};
use super::{decimal_to_digits, struct_dict};
use crate::prelude::*;
use crate::py_modules::UTILS;
Expand Down Expand Up @@ -43,56 +48,58 @@ impl ToPyObject for Wrap<&StructChunked> {

impl ToPyObject for Wrap<&DurationChunked> {
fn to_object(&self, py: Python) -> PyObject {
let utils = UTILS.bind(py);
let convert = utils.getattr(intern!(py, "to_py_timedelta")).unwrap();
let time_unit = self.0.time_unit().to_ascii();
let time_unit = self.0.time_unit();
let iter = self
.0
.iter()
.map(|opt_v| opt_v.map(|v| convert.call1((v, time_unit)).unwrap()));
.map(|opt_v| opt_v.map(|v| elapsed_offset_to_timedelta(v, time_unit)));
PyList::new_bound(py, iter).into_py(py)
}
}

impl ToPyObject for Wrap<&DatetimeChunked> {
fn to_object(&self, py: Python) -> PyObject {
let utils = UTILS.bind(py);
let convert = utils.getattr(intern!(py, "to_py_datetime")).unwrap();
let time_unit = self.0.time_unit().to_ascii();
let time_zone = self.0.time_zone().to_object(py);
let iter = self
.0
.iter()
.map(|opt_v| opt_v.map(|v| convert.call1((v, time_unit, &time_zone)).unwrap()));
PyList::new_bound(py, iter).into_py(py)
let time_zone = self.0.time_zone();
if time_zone.is_some() {
// Switch to more efficient code path in
// https://github.com/pola-rs/polars/issues/16199
let utils = UTILS.bind(py);
let convert = utils.getattr(intern!(py, "to_py_datetime")).unwrap();
let time_unit = self.0.time_unit().to_ascii();
let time_zone = time_zone.to_object(py);
let iter = self
.0
.iter()
.map(|opt_v| opt_v.map(|v| convert.call1((v, time_unit, &time_zone)).unwrap()));
PyList::new_bound(py, iter).into_py(py)
} else {
let time_unit = self.0.time_unit();
let iter = self
.0
.iter()
.map(|opt_v| opt_v.map(|v| timestamp_to_naive_datetime(v, time_unit)));
PyList::new_bound(py, iter).into_py(py)
}
}
}

impl ToPyObject for Wrap<&TimeChunked> {
fn to_object(&self, py: Python) -> PyObject {
let iter = time_to_pyobject_iter(py, self.0);
let iter = time_to_pyobject_iter(self.0);
PyList::new_bound(py, iter).into_py(py)
}
}

pub(crate) fn time_to_pyobject_iter<'a>(
py: Python<'a>,
ca: &'a TimeChunked,
) -> impl ExactSizeIterator<Item = Option<Bound<'a, PyAny>>> {
let utils = UTILS.bind(py);
let convert = utils.getattr(intern!(py, "to_py_time")).unwrap().clone();
pub(crate) fn time_to_pyobject_iter(
ca: &TimeChunked,
) -> impl '_ + ExactSizeIterator<Item = Option<NaiveTime>> {
ca.0.iter()
.map(move |opt_v| opt_v.map(|v| convert.call1((v,)).unwrap()))
.map(move |opt_v| opt_v.map(nanos_since_midnight_to_naivetime))
}

impl ToPyObject for Wrap<&DateChunked> {
fn to_object(&self, py: Python) -> PyObject {
let utils = UTILS.bind(py);
let convert = utils.getattr(intern!(py, "to_py_date")).unwrap();
let iter = self
.0
.into_iter()
.map(|opt_v| opt_v.map(|v| convert.call1((v,)).unwrap()));
let iter = self.0.into_iter().map(|opt_v| opt_v.map(date32_to_date));
PyList::new_bound(py, iter).into_py(py)
}
}
Expand Down
31 changes: 31 additions & 0 deletions py-polars/src/conversion/datetime.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
//! Utilities for converting dates, times, datetimes, and so on.

use polars::datatypes::TimeUnit;
use polars_core::export::chrono::{NaiveDateTime, NaiveTime, TimeDelta};

pub fn elapsed_offset_to_timedelta(elapsed: i64, time_unit: TimeUnit) -> TimeDelta {
let (in_second, nano_multiplier) = match time_unit {
TimeUnit::Nanoseconds => (1_000_000_000, 1),
TimeUnit::Microseconds => (1_000_000, 1_000),
TimeUnit::Milliseconds => (1_000, 1_000_000),
};
let mut elapsed_sec = elapsed / in_second;
let mut elapsed_nanos = nano_multiplier * (elapsed % in_second);
if elapsed_nanos < 0 {
// TimeDelta expects nanos to always be positive.
elapsed_sec -= 1;
elapsed_nanos += 1_000_000_000;
}
TimeDelta::new(elapsed_sec, elapsed_nanos as u32).unwrap()
}

/// Convert time-units-since-epoch to a more structured object.
pub fn timestamp_to_naive_datetime(since_epoch: i64, time_unit: TimeUnit) -> NaiveDateTime {
NaiveDateTime::UNIX_EPOCH + elapsed_offset_to_timedelta(since_epoch, time_unit)
}

/// Convert nanoseconds-since-midnight to a more structured object.
pub fn nanos_since_midnight_to_naivetime(nanos_since_midnight: i64) -> NaiveTime {
NaiveTime::from_hms_opt(0, 0, 0).unwrap()
+ elapsed_offset_to_timedelta(nanos_since_midnight, TimeUnit::Nanoseconds)
}
1 change: 1 addition & 0 deletions py-polars/src/conversion/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pub(crate) mod any_value;
pub(crate) mod chunked_array;
mod datetime;
use std::fmt::{Display, Formatter};
use std::hash::{Hash, Hasher};

Expand Down
2 changes: 1 addition & 1 deletion py-polars/src/series/export.rs
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ fn series_to_numpy_with_copy(py: Python, s: &Series) -> PyResult<PyObject> {
},
Time => {
let ca = s.time().unwrap();
let values = time_to_pyobject_iter(py, ca).map(|v| v.into_py(py));
let values = time_to_pyobject_iter(ca).map(|v| v.into_py(py));
PyArray1::from_iter_bound(py, values).into_py(py)
},
String => {
Expand Down

0 comments on commit 004efbe

Please sign in to comment.