Skip to content

Commit

Permalink
add date offset (#3827)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jun 28, 2022
1 parent 1cc4721 commit dc51044
Show file tree
Hide file tree
Showing 15 changed files with 177 additions and 5 deletions.
1 change: 1 addition & 0 deletions polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ describe = ["polars-core/describe"]
timezones = ["polars-core/timezones"]
string_justify = ["polars-lazy/string_justify", "polars-ops/string_justify"]
arg_where = ["polars-lazy/arg_where"]
date_offset = ["polars-lazy/date_offset"]

test = [
"lazy",
Expand Down
1 change: 1 addition & 0 deletions polars/polars-lazy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ dtype-duration = ["polars-core/dtype-duration", "polars-time/dtype-duration"]
dtype-categorical = ["polars-core/dtype-categorical"]
dtype-struct = ["polars-core/dtype-struct"]
object = ["polars-core/object"]
date_offset = []

true_div = []

Expand Down
13 changes: 10 additions & 3 deletions polars/polars-lazy/src/dsl/dt.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use super::*;
use polars_core::prelude::DataType::{Datetime, Duration};
use polars_time::prelude::TemporalMethods;

/// Specialized expressions for [`Series`] with dates/datetimes.
Expand Down Expand Up @@ -34,8 +33,8 @@ impl DateLikeNameSpace {
)),
},
GetOutput::map_dtype(move |dtype| match dtype {
DataType::Duration(_) => Duration(tu),
DataType::Datetime(_, tz) => Datetime(tu, tz.clone()),
DataType::Duration(_) => DataType::Duration(tu),
DataType::Datetime(_, tz) => DataType::Datetime(tu, tz.clone()),
_ => panic!("expected duration or datetime"),
}),
)
Expand Down Expand Up @@ -164,4 +163,12 @@ impl DateLikeNameSpace {
)
.with_fmt("dt.timestamp")
}

/// Offset this `Date/Datetime` by a given offset [`Duration`].
/// This will take leap years/ months into account.
#[cfg(feature = "date_offset")]
pub fn offset_by(self, by: Duration) -> Expr {
self.0
.map_private(FunctionExpr::DateOffset(by), "dt.offset_by")
}
}
23 changes: 23 additions & 0 deletions polars/polars-lazy/src/dsl/function_expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ mod is_in;
mod pow;
#[cfg(feature = "strings")]
mod strings;
#[cfg(any(feature = "temporal", feature = "date_offset"))]
mod temporal;

use super::*;
use polars_core::prelude::*;
Expand All @@ -31,6 +33,8 @@ pub enum FunctionExpr {
StringStartsWith(String),
#[cfg(feature = "strings")]
StringEndsWith(String),
#[cfg(feature = "date_offset")]
DateOffset(Duration),
}

impl FunctionExpr {
Expand All @@ -53,6 +57,8 @@ impl FunctionExpr {
})
};

let same_type = || map_dtype(&|dtype| dtype.clone());

use FunctionExpr::*;
match self {
NullCount => with_dtype(IDX_DTYPE),
Expand All @@ -67,6 +73,8 @@ impl FunctionExpr {
StringContains { .. } | StringEndsWith(_) | StringStartsWith(_) => {
with_dtype(DataType::Boolean)
}
#[cfg(feature = "date_offset")]
DateOffset(_) => same_type(),
}
}
}
Expand All @@ -88,6 +96,17 @@ macro_rules! map_with_args {
}};
}

macro_rules! map_owned_with_args {
($func:path, $($args:expr),*) => {{
let f = move |s: &mut [Series]| {
let s = std::mem::take(&mut s[0]);
$func(s, $($args),*)
};

SpecialEq::new(Arc::new(f))
}};
}

impl From<FunctionExpr> for SpecialEq<Arc<dyn SeriesUdf>> {
fn from(func: FunctionExpr) -> Self {
use FunctionExpr::*;
Expand Down Expand Up @@ -130,6 +149,10 @@ impl From<FunctionExpr> for SpecialEq<Arc<dyn SeriesUdf>> {
StringStartsWith(sub) => {
map_with_args!(strings::starts_with, &sub)
}
#[cfg(feature = "date_offset")]
DateOffset(offset) => {
map_owned_with_args!(temporal::date_offset, offset)
}
}
}
}
30 changes: 30 additions & 0 deletions polars/polars-lazy/src/dsl/function_expr/temporal.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
use super::*;

pub(super) fn date_offset(s: Series, offset: Duration) -> Result<Series> {
match s.dtype().clone() {
DataType::Date => {
let s = s
.cast(&DataType::Datetime(TimeUnit::Milliseconds, None))
.unwrap();
date_offset(s, offset).and_then(|s| s.cast(&DataType::Date))
}
DataType::Datetime(tu, _) => {
// drop series, so that we might modify in place
let mut ca = {
let me = std::mem::ManuallyDrop::new(s);
me.datetime().unwrap().clone()
};

let adder = match tu {
TimeUnit::Nanoseconds => Duration::add_ns,
TimeUnit::Microseconds => Duration::add_us,
TimeUnit::Milliseconds => Duration::add_ms,
};
ca.0.apply_mut(|v| adder(&offset, v));
Ok(ca.into_series())
}
dt => Err(PolarsError::ComputeError(
format!("cannot use 'date_offset' on Series of dtype: {:?}", dt).into(),
)),
}
}
6 changes: 4 additions & 2 deletions polars/polars-time/src/windows/duration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use std::ops::Mul;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

#[derive(Copy, Clone, Debug)]
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct Duration {
// the number of months for the duration
Expand Down Expand Up @@ -60,12 +60,14 @@ impl Duration {
let mut months = 0;
let mut iter = duration.char_indices();
let negative = duration.starts_with('-');
let mut start = 0;

// skip the '-' char
if negative {
start += 1;
iter.next().unwrap();
}

let mut start = 0;
let mut parsed_int = false;

let mut unit = String::with_capacity(2);
Expand Down
1 change: 1 addition & 0 deletions polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@
//! - `list_eval` - Apply expressions over list elements.
//! - `cumulative_eval` - Apply expressions over cumulatively increasing windows.
//! - `argwhere` Get indices where condition holds.
//! - `date_offset` Add an offset to dates that take months and leap years into account.
//! * `DataFrame` pretty printing
//! - `fmt` - Activate DataFrame formatting
//!
Expand Down
1 change: 1 addition & 0 deletions py-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ features = [
"string_justify",
"arg_where",
"timezones",
"date_offset",
]

# [patch.crates-io]
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/expression.rst
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ The following methods are available under the `expr.dt` attribute.
ExprDateTimeNameSpace.nanosecond
ExprDateTimeNameSpace.nanoseconds
ExprDateTimeNameSpace.ordinal_day
ExprDateTimeNameSpace.offset_by
ExprDateTimeNameSpace.quarter
ExprDateTimeNameSpace.second
ExprDateTimeNameSpace.seconds
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ The following methods are available under the `Series.dt` attribute.
DateTimeNameSpace.nanosecond
DateTimeNameSpace.nanoseconds
DateTimeNameSpace.ordinal_day
DateTimeNameSpace.offset_by
DateTimeNameSpace.quarter
DateTimeNameSpace.second
DateTimeNameSpace.seconds
Expand Down
30 changes: 30 additions & 0 deletions py-polars/polars/internals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5379,6 +5379,36 @@ def nanoseconds(self) -> Expr:
"""
return wrap_expr(self._pyexpr.duration_nanoseconds())

def offset_by(self, by: str) -> Expr:
"""
Offset this date by a relative time offset.
This differs from `pl.col("foo") + timedelta` in that it can
take months and leap years into account
Parameters
----------
by
The offset is dictated by the following string language:
- 1ns (1 nanosecond)
- 1us (1 microsecond)
- 1ms (1 millisecond)
- 1s (1 second)
- 1m (1 minute)
- 1h (1 hour)
- 1d (1 day)
- 1w (1 week)
- 1mo (1 calendar month)
- 1y (1 calendar year)
- 1i (1 index count)
Returns
-------
Date/Datetime expression
"""
return wrap_expr(self._pyexpr.dt_offset_by(by))


def expr_to_lit_or_expr(
expr: Union[
Expand Down
1 change: 1 addition & 0 deletions py-polars/polars/internals/lazy_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1104,6 +1104,7 @@ def argsort_by(


def duration(
*,
days: Optional[Union["pli.Expr", str]] = None,
seconds: Optional[Union["pli.Expr", str]] = None,
nanoseconds: Optional[Union["pli.Expr", str]] = None,
Expand Down
30 changes: 30 additions & 0 deletions py-polars/polars/internals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -5335,6 +5335,36 @@ def nanoseconds(self) -> Series:
"""
return pli.select(pli.lit(wrap_s(self._s)).dt.nanoseconds()).to_series()

def offset_by(self, by: str) -> Series:
"""
Offset this date by a relative time offset.
This differs from `pl.col("foo") + timedelta` in that it can
take months and leap years into account
Parameters
----------
by
The offset is dictated by the following string language:
- 1ns (1 nanosecond)
- 1us (1 microsecond)
- 1ms (1 millisecond)
- 1s (1 second)
- 1m (1 minute)
- 1h (1 hour)
- 1d (1 day)
- 1w (1 week)
- 1mo (1 calendar month)
- 1y (1 calendar year)
- 1i (1 index count)
Returns
-------
Date/Datetime expression
"""
return pli.select(pli.lit(wrap_s(self._s)).dt.offset_by(by)).to_series()


class CatNameSpace:
"""
Expand Down
6 changes: 6 additions & 0 deletions py-polars/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,12 @@ impl PyExpr {
pub fn timestamp(&self, tu: Wrap<TimeUnit>) -> PyExpr {
self.inner.clone().dt().timestamp(tu.0).into()
}

pub fn dt_offset_by(&self, by: &str) -> PyExpr {
let by = Duration::parse(by);
self.inner.clone().dt().offset_by(by).into()
}

pub fn dt_epoch_seconds(&self) -> PyExpr {
self.clone()
.inner
Expand Down
37 changes: 37 additions & 0 deletions py-polars/tests/test_datelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1157,3 +1157,40 @@ def test_quarter() -> None:
assert pl.date_range(
datetime(2022, 1, 1), datetime(2022, 12, 1), "1mo"
).dt.quarter().to_list() == [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4]


def test_date_offset() -> None:
out = pl.DataFrame(
{"dates": pl.date_range(datetime(2000, 1, 1), datetime(2020, 1, 1), "1y")}
).with_columns(
[
pl.col("dates").dt.offset_by("1y").alias("date_plus_1y"),
pl.col("dates").dt.offset_by("-1y2mo").alias("date_min"),
]
)

assert (out["date_plus_1y"].dt.day() == 1).all()
assert (out["date_min"].dt.day() == 1).all()
assert out["date_min"].to_list() == [
datetime(1998, 11, 1, 0, 0),
datetime(1999, 11, 1, 0, 0),
datetime(2000, 11, 1, 0, 0),
datetime(2001, 11, 1, 0, 0),
datetime(2002, 11, 1, 0, 0),
datetime(2003, 11, 1, 0, 0),
datetime(2004, 11, 1, 0, 0),
datetime(2005, 11, 1, 0, 0),
datetime(2006, 11, 1, 0, 0),
datetime(2007, 11, 1, 0, 0),
datetime(2008, 11, 1, 0, 0),
datetime(2009, 11, 1, 0, 0),
datetime(2010, 11, 1, 0, 0),
datetime(2011, 11, 1, 0, 0),
datetime(2012, 11, 1, 0, 0),
datetime(2013, 11, 1, 0, 0),
datetime(2014, 11, 1, 0, 0),
datetime(2015, 11, 1, 0, 0),
datetime(2016, 11, 1, 0, 0),
datetime(2017, 11, 1, 0, 0),
datetime(2018, 11, 1, 0, 0),
]

0 comments on commit dc51044

Please sign in to comment.