Skip to content

Commit

Permalink
dates: buckets -> truncate
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Dec 18, 2021
1 parent 921d248 commit 14b5c70
Show file tree
Hide file tree
Showing 10 changed files with 197 additions and 19 deletions.
4 changes: 2 additions & 2 deletions polars/polars-core/src/chunked_array/temporal/mod.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
//! Traits and utilities for temporal data.
#[cfg(feature = "temporal")]
pub mod buckets;
pub mod conversion;
#[cfg(feature = "dtype-date")]
mod date;
#[cfg(feature = "dtype-datetime")]
mod datetime;
#[cfg(feature = "dtype-time")]
mod time;
#[cfg(feature = "temporal")]
pub mod truncate;
mod utf8;

pub use self::conversion::*;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ use polars_time::{Duration, Window};

#[cfg(feature = "dtype-datetime")]
impl DatetimeChunked {
pub fn buckets(&self, every: Duration, offset: Duration) -> Self {
pub fn truncate(&self, every: Duration, offset: Duration) -> Self {
let w = Window::new(every, every, offset);
self.apply(|t| w.truncate(t)).into_date()
}
}

#[cfg(feature = "dtype-date")]
impl DateChunked {
pub fn buckets(&self, every: Duration, offset: Duration) -> Self {
pub fn truncate(&self, every: Duration, offset: Duration) -> Self {
let w = Window::new(every, every, offset);
self.apply(|t| {
const NSECS_IN_DAY: i64 = NANOSECONDS * SECONDS_IN_DAY;
Expand Down
2 changes: 1 addition & 1 deletion py-polars/docs/source/reference/expression.rst
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ The following methods are available under the `expr.dt` attribute.
ExprDateTimeNameSpace.nanosecond
ExprDateTimeNameSpace.to_python_datetime
ExprDateTimeNameSpace.timestamp
ExprDateTimeNameSpace.buckets
ExprDateTimeNameSpace.truncate
ExprDateTimeNameSpace.epoch_days
ExprDateTimeNameSpace.epoch_milliseconds
ExprDateTimeNameSpace.epoch_seconds
Expand Down
2 changes: 1 addition & 1 deletion py-polars/docs/source/reference/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ The following methods are available under the `Series.dt` attribute.
DateTimeNameSpace.max
DateTimeNameSpace.median
DateTimeNameSpace.mean
DateTimeNameSpace.buckets
DateTimeNameSpace.truncate
DateTimeNameSpace.epoch_days
DateTimeNameSpace.epoch_milliseconds
DateTimeNameSpace.epoch_seconds
Expand Down
87 changes: 83 additions & 4 deletions py-polars/polars/internals/expr.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import copy
import typing as tp
from datetime import date, datetime
from datetime import date, datetime, timedelta
from typing import Any, Callable, Optional, Sequence, Type, Union

import numpy as np

from polars.utils import _timedelta_to_pl_duration

try:
from polars.polars import PyExpr

Expand Down Expand Up @@ -2441,13 +2443,17 @@ class ExprDateTimeNameSpace:
def __init__(self, expr: Expr):
self._pyexpr = expr._pyexpr

def buckets(self, every: str, offset: Optional[str] = None) -> Expr:
def truncate(
self,
every: Union[str, timedelta],
offset: Optional[Union[str, timedelta]] = None,
) -> Expr:
"""
.. warning::
This API is experimental and may change without it being considered a breaking change.
Divide the date/ datetime range into buckets.
Data will be sorted by this operation.
Data must be sorted, if not the output does not make sense.
The `every` and `offset` arguments are created with
the following string language:
Expand Down Expand Up @@ -2476,10 +2482,83 @@ def buckets(self, every: str, offset: Optional[str] = None) -> Expr:
-------
Date/Datetime series
Examples
--------
>>> from datetime import timedelta, datetime
>>> start = datetime(2001, 1, 1)
>>> stop = datetime(2001, 1, 2)
>>> s = pl.date_range(start, stop, timedelta(minutes=30), name="dates")
>>> s
shape: (49,)
Series: 'dates' [datetime]
[
2001-01-01 00:00:00
2001-01-01 00:30:00
2001-01-01 01:00:00
2001-01-01 01:30:00
2001-01-01 02:00:00
2001-01-01 02:30:00
2001-01-01 03:00:00
2001-01-01 03:30:00
2001-01-01 04:00:00
2001-01-01 04:30:00
2001-01-01 05:00:00
2001-01-01 05:30:00
...
2001-01-01 18:30:00
2001-01-01 19:00:00
2001-01-01 19:30:00
2001-01-01 20:00:00
2001-01-01 20:30:00
2001-01-01 21:00:00
2001-01-01 21:30:00
2001-01-01 22:00:00
2001-01-01 22:30:00
2001-01-01 23:00:00
2001-01-01 23:30:00
2001-01-02 00:00:00
]
>>> s.dt.truncate("1h")
shape: (49,)
Series: 'dates' [datetime]
[
2001-01-01 00:00:00
2001-01-01 00:00:00
2001-01-01 01:00:00
2001-01-01 01:00:00
2001-01-01 02:00:00
2001-01-01 02:00:00
2001-01-01 03:00:00
2001-01-01 03:00:00
2001-01-01 04:00:00
2001-01-01 04:00:00
2001-01-01 05:00:00
2001-01-01 05:00:00
...
2001-01-01 18:00:00
2001-01-01 19:00:00
2001-01-01 19:00:00
2001-01-01 20:00:00
2001-01-01 20:00:00
2001-01-01 21:00:00
2001-01-01 21:00:00
2001-01-01 22:00:00
2001-01-01 22:00:00
2001-01-01 23:00:00
2001-01-01 23:00:00
2001-01-02 00:00:00
]
>>> assert s.dt.truncate("1h") == s.dt.truncate(timedelta(hours=1))
"""
if offset is None:
offset = "0ns"
return wrap_expr(self._pyexpr.date_buckets(every, offset))
if isinstance(every, timedelta):
every = _timedelta_to_pl_duration(every)
if isinstance(offset, timedelta):
offset = _timedelta_to_pl_duration(offset)
return wrap_expr(self._pyexpr.date_truncate(every, offset))

def strftime(self, fmt: str) -> Expr:
"""
Expand Down
3 changes: 3 additions & 0 deletions py-polars/polars/internals/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,9 @@ def date_range(
"""
Create a date range of type `Datetime`.
.. warning::
This API is experimental and may change without it being considered a breaking change.
Parameters
----------
low
Expand Down
84 changes: 79 additions & 5 deletions py-polars/polars/internals/series.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import sys
import typing as tp
from datetime import date, datetime
from datetime import date, datetime, timedelta
from numbers import Number
from typing import Any, Callable, Dict, Optional, Sequence, Tuple, Type, Union

Expand Down Expand Up @@ -3495,13 +3495,17 @@ class DateTimeNameSpace:
def __init__(self, series: Series):
self._s = series._s

def buckets(self, every: str, offset: Optional[str] = None) -> Series:
def truncate(
self,
every: Union[str, timedelta],
offset: Optional[Union[str, timedelta]] = None,
) -> Series:
"""
.. warning::
This API is experimental and will likely change.
This API is experimental and may change without it being considered a breaking change.
Divide the date/ datetime range into buckets.
Data will be sorted by this operation.
Data must be sorted, if not the output does not make sense.
The `every` and `offset` argument are created with the
the following string language:
Expand Down Expand Up @@ -3529,9 +3533,79 @@ def buckets(self, every: str, offset: Optional[str] = None) -> Series:
Returns
-------
Date/Datetime series
Examples
--------
>>> from datetime import timedelta, datetime
>>> start = datetime(2001, 1, 1)
>>> stop = datetime(2001, 1, 2)
>>> s = pl.date_range(start, stop, timedelta(minutes=30), name="dates")
>>> s
shape: (49,)
Series: 'dates' [datetime]
[
2001-01-01 00:00:00
2001-01-01 00:30:00
2001-01-01 01:00:00
2001-01-01 01:30:00
2001-01-01 02:00:00
2001-01-01 02:30:00
2001-01-01 03:00:00
2001-01-01 03:30:00
2001-01-01 04:00:00
2001-01-01 04:30:00
2001-01-01 05:00:00
2001-01-01 05:30:00
...
2001-01-01 18:30:00
2001-01-01 19:00:00
2001-01-01 19:30:00
2001-01-01 20:00:00
2001-01-01 20:30:00
2001-01-01 21:00:00
2001-01-01 21:30:00
2001-01-01 22:00:00
2001-01-01 22:30:00
2001-01-01 23:00:00
2001-01-01 23:30:00
2001-01-02 00:00:00
]
>>> s.dt.truncate("1h")
shape: (49,)
Series: 'dates' [datetime]
[
2001-01-01 00:00:00
2001-01-01 00:00:00
2001-01-01 01:00:00
2001-01-01 01:00:00
2001-01-01 02:00:00
2001-01-01 02:00:00
2001-01-01 03:00:00
2001-01-01 03:00:00
2001-01-01 04:00:00
2001-01-01 04:00:00
2001-01-01 05:00:00
2001-01-01 05:00:00
...
2001-01-01 18:00:00
2001-01-01 19:00:00
2001-01-01 19:00:00
2001-01-01 20:00:00
2001-01-01 20:00:00
2001-01-01 21:00:00
2001-01-01 21:00:00
2001-01-01 22:00:00
2001-01-01 22:00:00
2001-01-01 23:00:00
2001-01-01 23:00:00
2001-01-02 00:00:00
]
>>> assert s.dt.truncate("1h") == s.dt.truncate(timedelta(hours=1))
"""
return pli.select(
pli.lit(wrap_s(self._s)).dt.buckets(every, offset)
pli.lit(wrap_s(self._s)).dt.truncate(every, offset)
).to_series()

def __getitem__(self, item: int) -> Union[date, datetime]:
Expand Down
5 changes: 5 additions & 0 deletions py-polars/polars/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import ctypes
import typing as tp
from datetime import timedelta
from typing import Any, Dict, Tuple, Union

import numpy as np
Expand Down Expand Up @@ -35,3 +36,7 @@ def _ptr_to_numpy(ptr: int, len: int, ptr_type: Any) -> np.ndarray:
"""
ptr_ctype = ctypes.cast(ptr, ctypes.POINTER(ptr_type))
return np.ctypeslib.as_array(ptr_ctype, (len,))


def _timedelta_to_pl_duration(td: timedelta) -> str:
return f"{td.days}d{td.seconds}s{td.microseconds}us"
6 changes: 3 additions & 3 deletions py-polars/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -986,17 +986,17 @@ impl PyExpr {
self.inner.clone().str_concat(delimiter).into()
}

fn date_buckets(&self, every: &str, offset: &str) -> Self {
fn date_truncate(&self, every: &str, offset: &str) -> Self {
let every = Duration::parse(every);
let offset = Duration::parse(offset);
self.inner
.clone()
.apply(
move |s| match s.dtype() {
DataType::Datetime => {
Ok(s.datetime().unwrap().buckets(every, offset).into_series())
Ok(s.datetime().unwrap().truncate(every, offset).into_series())
}
DataType::Date => Ok(s.date().unwrap().buckets(every, offset).into_series()),
DataType::Date => Ok(s.date().unwrap().truncate(every, offset).into_series()),
dt => Err(PolarsError::ComputeError(
format!("expected date/datetime got {:?}", dt).into(),
)),
Expand Down
19 changes: 18 additions & 1 deletion py-polars/tests/test_datelike.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from datetime import date, datetime
from datetime import date, datetime, timedelta

import numpy as np
import pyarrow as pa
Expand Down Expand Up @@ -178,3 +178,20 @@ def test_to_numpy() -> None:
str(s1.to_numpy()[:2])
== "['1970-01-01T00:02:03.543' '1970-01-01T00:04:43.478']"
)


def test_truncate() -> None:
start = datetime(2001, 1, 1)
stop = datetime(2001, 1, 2)
s = pl.date_range(start, stop, timedelta(minutes=30), name="dates")

# we can pass strings and timedeltas
for out in [s.dt.truncate("1h"), s.dt.truncate(timedelta(hours=1))]:
assert out.dt[0] == start
assert out.dt[1] == start
assert out.dt[2] == start + timedelta(hours=1)
assert out.dt[3] == start + timedelta(hours=1)
# ...
assert out.dt[-3] == stop - timedelta(hours=1)
assert out.dt[-2] == stop - timedelta(hours=1)
assert out.dt[-1] == stop

0 comments on commit 14b5c70

Please sign in to comment.