Skip to content

Commit

Permalink
polars: create date_range natively
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Dec 18, 2021
1 parent 14b5c70 commit 62bc6b2
Show file tree
Hide file tree
Showing 12 changed files with 105 additions and 44 deletions.
2 changes: 2 additions & 0 deletions polars/polars-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ pub mod series;
pub mod testing;
#[cfg(test)]
mod tests;
#[cfg(feature = "temporal")]
pub mod time;
pub(crate) mod vector_hasher;

#[cfg(any(feature = "dtype-categorical", feature = "object"))]
Expand Down
13 changes: 13 additions & 0 deletions polars/polars-core/src/time.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
use crate::datatypes::Int64Chunked;
use crate::prelude::DatetimeChunked;
pub use polars_time::*;

pub fn date_range(
start: TimeNanoseconds,
stop: TimeNanoseconds,
every: Duration,
closed: ClosedWindow,
name: &str,
) -> DatetimeChunked {
Int64Chunked::new_vec(name, date_range_vec(start, stop, every, closed)).into_date()
}
16 changes: 15 additions & 1 deletion polars/polars-time/src/calendar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,21 @@ pub fn date_range(
t += every.duration()
}
}
_ => unimplemented!(),
ClosedWindow::Right => {
t += every.duration();
while t <= stop {
ts.push(t);
t += every.duration()
}
}
ClosedWindow::None => {
t += every.duration();
while t < stop {
ts.push(t);
t += every.duration()
}
}
}
debug_assert!(size >= ts.len());
ts
}
1 change: 1 addition & 0 deletions polars/polars-time/src/duration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ impl Duration {
}

/// Estimated duration of the window duration. Not a very good one if months != 0.
#[inline]
pub const fn duration(&self) -> TimeNanoseconds {
self.months * 30 * 24 * 3600 * NS_SECOND + self.nsecs
}
Expand Down
5 changes: 4 additions & 1 deletion polars/polars-time/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,7 @@ mod test;
mod unit;
mod window;

pub use {calendar::date_range, duration::Duration, window::Window};
pub use {
calendar::date_range as date_range_vec, duration::Duration, groupby::ClosedWindow,
unit::TimeNanoseconds, window::Window,
};
39 changes: 19 additions & 20 deletions py-polars/polars/internals/functions.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
from datetime import datetime, timedelta
from typing import Optional, Sequence, Union, overload

import numpy as np

from polars import internals as pli
from polars.datatypes import Datetime, py_type_to_dtype
from polars.datatypes import py_type_to_dtype
from polars.utils import _datetime_to_pl_timestamp, _timedelta_to_pl_duration

try:
from polars.polars import concat_df as _concat_df
from polars.polars import concat_lf as _concat_lf
from polars.polars import concat_series as _concat_series
from polars.polars import py_date_range as _py_date_range
from polars.polars import py_diag_concat_df as _diag_concat_df

_DOCUMENTING = False
Expand Down Expand Up @@ -135,16 +135,13 @@ def arg_where(mask: "pli.Series") -> "pli.Series":
def date_range(
low: datetime,
high: datetime,
interval: timedelta,
closed: Optional[str] = None,
interval: Union[str, timedelta],
closed: Optional[str] = "both",
name: Optional[str] = None,
) -> "pli.Series":
"""
Create a date range of type `Datetime`.
.. warning::
This API is experimental and may change without it being considered a breaking change.
Parameters
----------
low
Expand All @@ -153,8 +150,10 @@ def date_range(
Upper bound of the date range
interval
Interval periods
closed {None, 'left', 'right'}
Make the interval closed to the 'left', 'right', or both sides (None, the default).
A python timedelta object or a polars duration `str`
e.g.: "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
closed {None, 'left', 'right', 'both', 'none'}
Make the interval closed to the 'left', 'right', 'none' or 'both' sides.
name
Name of the output Series
Expand All @@ -164,10 +163,8 @@ def date_range(
Examples
--------
>>> from datetime import datetime, timedelta
>>> pl.date_range(
... datetime(1985, 1, 1), datetime(2015, 7, 1), timedelta(days=1, hours=12)
... )
>>> from datetime import datetime
>>> pl.date_range(datetime(1985, 1, 1), datetime(2015, 7, 1), "1d12h")
shape: (7426,)
Series: '' [datetime]
[
Expand Down Expand Up @@ -199,9 +196,11 @@ def date_range(
]
"""
values = np.arange(low, high, interval, dtype="datetime64[ns]")
if closed in (None, "right") and (high - low) % interval == timedelta(0):
values = np.append(values, np.array(high, dtype="datetime64[ns]"))
if closed == "right":
values = values[1:]
return pli.Series(name=name, values=values.astype(np.int64)).cast(Datetime)
if isinstance(interval, timedelta):
interval = _timedelta_to_pl_duration(interval)
start = _datetime_to_pl_timestamp(low)
stop = _datetime_to_pl_timestamp(high)
if name is None:
name = ""

return pli.wrap_s(_py_date_range(start, stop, interval, closed, name))
9 changes: 8 additions & 1 deletion py-polars/polars/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import ctypes
import typing as tp
from datetime import timedelta
from datetime import datetime, timedelta, timezone
from typing import Any, Dict, Tuple, Union

import numpy as np
Expand Down Expand Up @@ -40,3 +40,10 @@ def _ptr_to_numpy(ptr: int, len: int, ptr_type: Any) -> np.ndarray:

def _timedelta_to_pl_duration(td: timedelta) -> str:
return f"{td.days}d{td.seconds}s{td.microseconds}us"


def _datetime_to_pl_timestamp(dt: datetime) -> int:
"""
Converts a python datetime to a timestamp in nanoseconds
"""
return int(dt.replace(tzinfo=timezone.utc).timestamp() * 1e9)
13 changes: 13 additions & 0 deletions py-polars/src/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,19 @@ impl ToPyObject for Wrap<DataType> {
}
}

impl FromPyObject<'_> for Wrap<ClosedWindow> {
fn extract(ob: &'_ PyAny) -> PyResult<Self> {
let s = ob.extract::<&str>()?;
Ok(Wrap(match s {
"none" => ClosedWindow::None,
"both" => ClosedWindow::Both,
"left" => ClosedWindow::Left,
"right" => ClosedWindow::Right,
_ => panic!("{}", "closed should be any of {'none', 'left', 'right'}"),
}))
}
}

impl FromPyObject<'_> for Wrap<DataType> {
fn extract(ob: &PyAny) -> PyResult<Self> {
let dtype = match ob.repr().unwrap().to_str().unwrap() {
Expand Down
10 changes: 2 additions & 8 deletions py-polars/src/lazy/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -306,16 +306,10 @@ impl PyLazyFrame {
offset: &str,
truncate: bool,
include_boundaries: bool,
closed: &str,
closed: Wrap<ClosedWindow>,
by: Vec<PyExpr>,
) -> PyLazyGroupBy {
let closed_window = match closed {
"none" => ClosedWindow::None,
"both" => ClosedWindow::Both,
"left" => ClosedWindow::Left,
"right" => ClosedWindow::Right,
_ => panic!("{}", "closed should be any of {'none', 'left', 'right'}"),
};
let closed_window = closed.0;
let by = by
.into_iter()
.map(|pyexpr| pyexpr.inner)
Expand Down
17 changes: 16 additions & 1 deletion py-polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,11 @@ pub mod utils;
use crate::conversion::{get_df, get_lf, get_pyseq, get_series, Wrap};
use crate::error::PyPolarsEr;
use crate::file::get_either_file;
use crate::prelude::{DataType, PyDataType};
use crate::prelude::{ClosedWindow, DataType, Duration, PyDataType};
use mimalloc::MiMalloc;
use polars::functions::diag_concat_df;
use polars_core::export::arrow::io::ipc::read::read_file_metadata;
use polars_core::prelude::IntoSeries;
use pyo3::types::PyDict;

#[global_allocator]
Expand Down Expand Up @@ -286,6 +287,19 @@ pub fn map_mul(
lazy::map_mul(&pyexpr, py, lambda, output_type, apply_groups)
}

#[pyfunction]
fn py_date_range(
start: i64,
stop: i64,
every: &str,
closed: Wrap<ClosedWindow>,
name: &str,
) -> PySeries {
polars_core::time::date_range(start, stop, Duration::parse(every), closed.0, name)
.into_series()
.into()
}

#[pymodule]
fn polars(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<PySeries>().unwrap();
Expand Down Expand Up @@ -319,5 +333,6 @@ fn polars(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(map_mul)).unwrap();
m.add_wrapped(wrap_pyfunction!(py_diag_concat_df)).unwrap();
m.add_wrapped(wrap_pyfunction!(py_datetime)).unwrap();
m.add_wrapped(wrap_pyfunction!(py_date_range)).unwrap();
Ok(())
}
11 changes: 11 additions & 0 deletions py-polars/tests/test_datelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,3 +195,14 @@ def test_truncate() -> None:
assert out.dt[-3] == stop - timedelta(hours=1)
assert out.dt[-2] == stop - timedelta(hours=1)
assert out.dt[-1] == stop


def test_date_range() -> None:
result = pl.date_range(
datetime(1985, 1, 1), datetime(2015, 7, 1), timedelta(days=1, hours=12)
)
assert len(result) == 7426
assert result.dt[0] == datetime(1985, 1, 1)
assert result.dt[1] == datetime(1985, 1, 2, 12, 0)
assert result.dt[2] == datetime(1985, 1, 4, 0, 0)
assert result.dt[-1] == datetime(2015, 6, 30, 12, 0)
13 changes: 1 addition & 12 deletions py-polars/tests/test_series.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from datetime import date, datetime, timedelta
from datetime import date

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -790,17 +790,6 @@ def test_trigonometry_functions() -> None:
assert np.allclose(srs_float.arctan(), np.array([0.785, 0.0, -0.785]), atol=0.01)


def test_date_range() -> None:
result = pl.date_range(
datetime(1985, 1, 1), datetime(2015, 7, 1), timedelta(days=1, hours=12)
)
assert len(result) == 7426
assert result.dt[0] == datetime(1985, 1, 1)
assert result.dt[1] == datetime(1985, 1, 2, 12, 0)
assert result.dt[2] == datetime(1985, 1, 4, 0, 0)
assert result.dt[-1] == datetime(2015, 6, 30, 12, 0)


def test_abs() -> None:
# ints
s = pl.Series([1, -2, 3, -4])
Expand Down

0 comments on commit 62bc6b2

Please sign in to comment.