Skip to content

Commit

Permalink
python add units to timestamp and epoch (#2821)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Mar 3, 2022
1 parent 854a430 commit 41e4adc
Show file tree
Hide file tree
Showing 10 changed files with 117 additions and 50 deletions.
29 changes: 4 additions & 25 deletions polars/polars-core/src/series/series_trait.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1004,31 +1004,10 @@ pub trait SeriesTrait:
feature = "dtype-datetime"
))]
#[cfg_attr(docsrs, doc(cfg(feature = "temporal")))]
/// Convert date(time) object to timestamp in ms.
fn timestamp(&self) -> Result<Int64Chunked> {
match self.dtype() {
DataType::Date => self
.cast(&DataType::Datetime(TimeUnit::Milliseconds, None))
.unwrap()
.datetime()
.map(|ca| (ca.deref().clone())),
DataType::Datetime(tu, tz) => {
use TimeUnit::*;
match (tu, tz.as_deref()) {
(Nanoseconds, None | Some("")) => {
self.datetime().map(|ca| ca.deref().clone() / 1_000_000)
}
(Microseconds, None | Some("")) => {
self.datetime().map(|ca| ca.deref().clone() / 1_000)
}
(Milliseconds, None | Some("")) => self.datetime().map(|ca| ca.deref().clone()),
(_, Some(_)) => panic!("tz not yet supported"),
}
}
_ => Err(PolarsError::InvalidOperation(
format!("operation not supported on dtype {:?}", self.dtype()).into(),
)),
}
/// Convert date(time) object to timestamp in [`TimeUnit`].
fn timestamp(&self, tu: TimeUnit) -> Result<Int64Chunked> {
self.cast(&DataType::Datetime(tu, None))
.map(|s| s.datetime().unwrap().deref().clone())
}

/// Clone inner ChunkedArray and wrap in a new Arc
Expand Down
9 changes: 9 additions & 0 deletions polars/polars-lazy/src/dsl/dt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,4 +144,13 @@ impl DateLikeNameSpace {
.map(function, GetOutput::from_type(DataType::UInt32))
.with_fmt("nanosecond")
}

pub fn timestamp(self, tu: TimeUnit) -> Expr {
self.0
.map(
move |s| s.timestamp(tu).map(|ca| ca.into_series()),
GetOutput::from_type(DataType::Int64),
)
.with_fmt("timestamp")
}
}
2 changes: 1 addition & 1 deletion py-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ mimalloc = { version = "*", default-features = false }
ndarray = "0.15"
numpy = "0.15"
polars-core = { path = "../polars/polars-core", default-features = false }
pyo3 = { version = "0.15", features = ["abi3-py36", "extension-module", "multiple-pymethods"] }
pyo3 = { version = "0.15", features = ["abi3-py37", "extension-module", "multiple-pymethods"] }
serde_json = { version = "1", optional = true }
thiserror = "^1.0"

Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/expression.rst
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ The following methods are available under the `expr.dt` attribute.
ExprDateTimeNameSpace.to_python_datetime
ExprDateTimeNameSpace.timestamp
ExprDateTimeNameSpace.truncate
ExprDateTimeNameSpace.epoch
ExprDateTimeNameSpace.epoch_days
ExprDateTimeNameSpace.epoch_milliseconds
ExprDateTimeNameSpace.epoch_seconds
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ The following methods are available under the `Series.dt` attribute.
DateTimeNameSpace.median
DateTimeNameSpace.mean
DateTimeNameSpace.truncate
DateTimeNameSpace.epoch
DateTimeNameSpace.epoch_days
DateTimeNameSpace.epoch_milliseconds
DateTimeNameSpace.epoch_seconds
Expand Down
44 changes: 39 additions & 5 deletions py-polars/polars/internals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -3867,11 +3867,32 @@ def to_python_datetime(self) -> Expr:
lambda s: s.dt.to_python_datetime(), return_dtype=Object
)

def epoch(self, tu: str = "us") -> Expr:
"""
Get the time passed since the Unix EPOCH in the give time unit
Parameters
----------
tu
One of {'ns', 'us', 'ms', 's', 'd'}
"""
if tu in ["ns", "us", "ms"]:
return self.timestamp(tu)
if tu == "s":
return wrap_expr(self._pyexpr.dt_epoch_seconds())
if tu == "d":
return wrap_expr(self._pyexpr).cast(Date).cast(Int32)
else:
raise ValueError(f"time unit {tu} not understood")

def epoch_days(self) -> Expr:
"""
Get the number of days since the unix EPOCH.
If the date is before the unix EPOCH, the number of days will be negative.
.. deprecated:: 0.13.9
Use :func:`epoch` instead.
Returns
-------
Days as Int32
Expand All @@ -3883,26 +3904,39 @@ def epoch_milliseconds(self) -> Expr:
Get the number of milliseconds since the unix EPOCH
If the date is before the unix EPOCH, the number of milliseconds will be negative.
.. deprecated:: 0.13.9
Use :func:`epoch` instead.
Returns
-------
Milliseconds as Int64
"""
return self.timestamp()
return self.timestamp("ms")

def epoch_seconds(self) -> Expr:
"""
Get the number of seconds since the unix EPOCH
If the date is before the unix EPOCH, the number of seconds will be negative.
.. deprecated:: 0.13.9
Use :func:`epoch` instead.
Returns
-------
Milliseconds as Int64
"""
return wrap_expr(self._pyexpr.dt_epoch_seconds())

def timestamp(self) -> Expr:
"""Return timestamp in milliseconds as Int64 type."""
return wrap_expr(self._pyexpr.timestamp())
def timestamp(self, tu: str = "us") -> Expr:
"""
Return a timestamp in the given time unit.
Parameters
----------
tu
One of {'ns', 'us', 'ms'}
"""
return wrap_expr(self._pyexpr.timestamp(tu))

def with_time_unit(self, tu: str) -> Expr:
"""
Expand All @@ -3912,7 +3946,7 @@ def with_time_unit(self, tu: str) -> Expr:
Parameters
----------
tu
Time unit for the `Datetime` Series: any of {"ns", "us", "ms"}
Time unit for the `Datetime` Series: one of {"ns", "us", "ms"}
"""
return wrap_expr(self._pyexpr.dt_with_time_unit(tu))

Expand Down
42 changes: 37 additions & 5 deletions py-polars/polars/internals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4435,17 +4435,22 @@ def nanosecond(self) -> Series:
"""
return wrap_s(self._s.nanosecond())

def timestamp(self) -> Series:
def timestamp(self, tu: str = "us") -> Series:
"""
Return timestamp in ms as Int64 type.
Return a timestamp in the given time unit.
Parameters
----------
tu
One of {'ns', 'us', 'ms'}
"""
return wrap_s(self._s.timestamp())
return wrap_s(self._s.timestamp(tu))

def to_python_datetime(self) -> Series:
"""
Go from Date/Datetime to python DateTime objects
"""
return (self.timestamp() / 1000).apply(
return (self.timestamp("ms") / 1000).apply(
lambda ts: datetime.utcfromtimestamp(ts), Object
)

Expand Down Expand Up @@ -4481,11 +4486,32 @@ def mean(self) -> Union[date, datetime]:
out = int(s.mean())
return _to_python_datetime(out, s.dtype, s.time_unit)

def epoch(self, tu: str = "us") -> Series:
"""
Get the time passed since the Unix EPOCH in the give time unit
Parameters
----------
tu
One of {'ns', 'us', 'ms', 's', 'd'}
"""
if tu in ["ns", "us", "ms"]:
return self.timestamp(tu)
if tu == "s":
return wrap_s(self._s.dt_epoch_seconds())
if tu == "d":
return wrap_s(self._s).cast(Date).cast(Int32)
else:
raise ValueError(f"time unit {tu} not understood")

def epoch_days(self) -> Series:
"""
Get the number of days since the unix EPOCH.
If the date is before the unix EPOCH, the number of days will be negative.
.. deprecated:: 0.13.9
Use :func:`epoch` instead.
Returns
-------
Days as Int32
Expand All @@ -4497,17 +4523,23 @@ def epoch_milliseconds(self) -> Series:
Get the number of milliseconds since the unix EPOCH
If the date is before the unix EPOCH, the number of milliseconds will be negative.
.. deprecated:: 0.13.9
Use :func:`epoch` instead.
Returns
-------
Milliseconds as Int64
"""
return self.timestamp()
return self.timestamp("ms")

def epoch_seconds(self) -> Series:
"""
Get the number of seconds since the unix EPOCH
If the date is before the unix EPOCH, the number of seconds will be negative.
.. deprecated:: 0.13.9
Use :func:`epoch` instead.
Returns
-------
Milliseconds as Int64
Expand Down
16 changes: 6 additions & 10 deletions py-polars/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -671,21 +671,17 @@ impl PyExpr {
)
.into()
}
pub fn timestamp(&self) -> PyExpr {
self.clone()
.inner
.map(
|s| s.timestamp().map(|ca| ca.into_series()),
GetOutput::from_type(DataType::Int64),
)
.with_fmt("timestamp")
.into()
pub fn timestamp(&self, tu: Wrap<TimeUnit>) -> PyExpr {
self.inner.clone().dt().timestamp(tu.0).into()
}
pub fn dt_epoch_seconds(&self) -> PyExpr {
self.clone()
.inner
.map(
|s| s.timestamp().map(|ca| (ca / 1000).into_series()),
|s| {
s.timestamp(TimeUnit::Milliseconds)
.map(|ca| (ca / 1000).into_series())
},
GetOutput::from_type(DataType::Int64),
)
.into()
Expand Down
9 changes: 6 additions & 3 deletions py-polars/src/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1095,8 +1095,8 @@ impl PySeries {
Ok(PySeries::new(s))
}

pub fn timestamp(&self) -> PyResult<Self> {
let ca = self.series.timestamp().map_err(PyPolarsEr::from)?;
pub fn timestamp(&self, tu: Wrap<TimeUnit>) -> PyResult<Self> {
let ca = self.series.timestamp(tu.0).map_err(PyPolarsEr::from)?;
Ok(ca.into_series().into())
}

Expand Down Expand Up @@ -1319,7 +1319,10 @@ impl PySeries {
}

pub fn dt_epoch_seconds(&self) -> PyResult<Self> {
let ms = self.series.timestamp().map_err(PyPolarsEr::from)?;
let ms = self
.series
.timestamp(TimeUnit::Milliseconds)
.map_err(PyPolarsEr::from)?;
Ok((ms / 1000).into_series().into())
}

Expand Down
14 changes: 13 additions & 1 deletion py-polars/tests/test_datelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def test_diff_datetime() -> None:

def test_timestamp() -> None:
a = pl.Series("a", [a * 1000_000 for a in [10000, 20000, 30000]], dtype=pl.Datetime)
assert a.dt.timestamp() == [10000, 20000, 30000]
assert a.dt.timestamp("ms") == [10000, 20000, 30000]
out = a.dt.to_python_datetime()
assert isinstance(out[0], datetime)
assert a.dt.min() == out[0]
Expand Down Expand Up @@ -527,3 +527,15 @@ def test_read_utc_times_parquet() -> None:
f.seek(0)
df_in = pl.read_parquet(f)
assert df_in["Timestamp"][0] == datetime(2022, 1, 1, 0, 0)


def test_epoch() -> None:
dates = pl.Series("dates", [datetime(2001, 1, 1), datetime(2001, 2, 1, 10, 8, 9)])

for unit in ["ns", "us", "ms"]:
assert dates.dt.epoch(unit).series_equal(dates.dt.timestamp(unit))

assert dates.dt.epoch("s").series_equal(dates.dt.timestamp("ms") // 1000)
assert dates.dt.epoch("d").series_equal(
(dates.dt.timestamp("ms") // (1000 * 3600 * 24)).cast(pl.Int32)
)

0 comments on commit 41e4adc

Please sign in to comment.