Skip to content

Commit

Permalink
feat(python): pl.from_epoch function (#5330)
Browse files Browse the repository at this point in the history
  • Loading branch information
YuRiTan committed Nov 1, 2022
1 parent e0a1bdb commit 7c3fb74
Show file tree
Hide file tree
Showing 6 changed files with 209 additions and 1 deletion.
2 changes: 2 additions & 0 deletions py-polars/polars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ def version() -> str:
first,
fold,
format,
from_epoch,
groups,
head,
last,
Expand Down Expand Up @@ -270,6 +271,7 @@ def version() -> str:
"collect_all",
"exclude",
"format",
"from_epoch",
"datetime", # named _datetime, see import above
"date", # name _date, see import above
"list", # named to_list, see import above
Expand Down
2 changes: 2 additions & 0 deletions py-polars/polars/internals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
count,
element,
format,
from_epoch,
lit,
select,
struct,
Expand All @@ -63,6 +64,7 @@
"element",
"expr_to_lit_or_expr",
"format",
"from_epoch",
"lit",
"read_ipc_schema",
"read_parquet_schema",
Expand Down
122 changes: 122 additions & 0 deletions py-polars/polars/internals/lazy_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@

from polars import internals as pli
from polars.datatypes import (
DTYPE_TEMPORAL_UNITS,
DataType,
Date,
Datetime,
Duration,
Int64,
PolarsDataType,
Time,
UInt32,
Expand All @@ -19,6 +21,7 @@
)
from polars.dependencies import _NUMPY_TYPE
from polars.dependencies import numpy as np
from polars.internals.type_aliases import EpochTimeUnit
from polars.utils import (
_datetime_to_pl_timestamp,
_time_to_pl_time,
Expand Down Expand Up @@ -2119,3 +2122,122 @@ def coalesce(
"""
exprs = pli.selection_to_pyexpr_list(exprs)
return pli.wrap_expr(_coalesce_exprs(exprs))


@overload
def from_epoch(
column: str | pli.Expr | pli.Series,
unit: EpochTimeUnit = ...,
*,
eager: Literal[False],
) -> pli.Expr:
...


@overload
def from_epoch(
column: str | pli.Expr | pli.Series | Sequence[int],
unit: EpochTimeUnit = ...,
*,
eager: Literal[True],
) -> pli.Series:
...


@overload
def from_epoch(
column: pli.Series | Sequence[int],
unit: EpochTimeUnit = ...,
*,
eager: Literal[True] = ...,
) -> pli.Series:
...


@overload
def from_epoch(
column: str | pli.Expr,
unit: EpochTimeUnit = ...,
*,
eager: Literal[False] = ...,
) -> pli.Expr:
...


@overload
def from_epoch(
column: str | pli.Expr | pli.Series | Sequence[int],
unit: EpochTimeUnit = ...,
*,
eager: bool = ...,
) -> pli.Expr | pli.Series:
...


def from_epoch(
column: str | pli.Expr | pli.Series | Sequence[int],
unit: EpochTimeUnit = "s",
*,
eager: bool = False,
) -> pli.Expr | pli.Series:
"""
Utility function that parses an epoch timestamp (or Unix time) to Polars Date(time).
Depending on the `unit` provided, this function will return a different dtype:
- unit="d" returns pl.Date
- unit="s" returns pl.Datetime["us"] (pl.Datetime's default)
- unit="ms" returns pl.Datetime["ms"]
- unit="us" returns pl.Datetime["us"]
- unit="ns" returns pl.Datetime["ns"]
Parameters
----------
column
Series or expression to parse integers to pl.Datetime.
unit
The unit of the timesteps since epoch time.
eager
If eager evaluation is `True`, a Series is returned instead of an Expr.
Examples
--------
>>> df = pl.DataFrame({"timestamp": [1666683077, 1666683099]}).lazy()
>>> df.select(pl.from_epoch(pl.col("timestamp"), unit="s")).collect()
shape: (2, 1)
┌─────────────────────┐
│ timestamp │
│ --- │
│ datetime[μs] │
╞═════════════════════╡
│ 2022-10-25 07:31:17 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2022-10-25 07:31:39 │
└─────────────────────┘
"""
if isinstance(column, str):
column = col(column)
elif not isinstance(column, (pli.Series, pli.Expr)):
column = pli.Series(column) # Sequence input handled by Series constructor

if unit == "d":
expr = column.cast(Date)
elif unit == "s":
expr = (column.cast(Int64) * 1_000_000).cast(Datetime("us"))
elif unit in DTYPE_TEMPORAL_UNITS:
expr = column.cast(Datetime(unit))
else:
raise ValueError(
f"'unit' must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got '{unit}'."
)

if eager:
if not isinstance(column, pli.Series):
raise ValueError(
"expected 'Series or Sequence' in 'from_epoch' if 'eager=True', got"
f" {type(column)}"
)
else:
return column.to_frame().select(expr).to_series()
else:
return expr
2 changes: 1 addition & 1 deletion py-polars/polars/internals/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def __init__(
elif _PANDAS_TYPE(values) and isinstance(values, (pd.Series, pd.DatetimeIndex)):
self._s = pandas_to_pyseries(name, values)
else:
raise ValueError("Series constructor not called properly.")
raise ValueError(f"Series constructor not called properly. Got {values}.")

@classmethod
def _from_pyseries(cls, pyseries: PySeries) -> Series:
Expand Down
50 changes: 50 additions & 0 deletions py-polars/tests/unit/test_lazy.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import os
from datetime import date, datetime
from functools import reduce
from operator import add
from string import ascii_letters
Expand All @@ -12,7 +13,9 @@

import polars as pl
from polars import col, lit, when
from polars.datatypes import PolarsDataType
from polars.testing import assert_frame_equal
from polars.testing.asserts import assert_series_equal


def test_lazy() -> None:
Expand Down Expand Up @@ -1498,3 +1501,50 @@ def test_quadratic_behavior_4736() -> None:
# our tests it has passed.
df = pl.DataFrame(columns=list(ascii_letters))
df.lazy().select(reduce(add, (pl.col(fld) for fld in df.columns)))


@pytest.mark.parametrize("input_dtype", [pl.Utf8, pl.Int64, pl.Float64])
def test_from_epoch(input_dtype: PolarsDataType) -> None:
ldf = pl.DataFrame(
[
pl.Series("timestamp_d", [13285]).cast(input_dtype),
pl.Series("timestamp_s", [1147880044]).cast(input_dtype),
pl.Series("timestamp_ms", [1147880044 * 1_000]).cast(input_dtype),
pl.Series("timestamp_us", [1147880044 * 1_000_000]).cast(input_dtype),
pl.Series("timestamp_ns", [1147880044 * 1_000_000_000]).cast(input_dtype),
]
).lazy()

exp_dt = datetime(2006, 5, 17, 15, 34, 4)
expected = pl.DataFrame(
[
pl.Series("timestamp_d", [date(2006, 5, 17)]),
pl.Series("timestamp_s", [exp_dt]), # s is no Polars dtype, defaults to us
pl.Series("timestamp_ms", [exp_dt]).cast(pl.Datetime("ms")),
pl.Series("timestamp_us", [exp_dt]), # us is Polars Datetime default
pl.Series("timestamp_ns", [exp_dt]).cast(pl.Datetime("ns")),
]
)

ldf_result = ldf.select(
[
pl.from_epoch(pl.col("timestamp_d"), unit="d"),
pl.from_epoch(pl.col("timestamp_s"), unit="s"),
pl.from_epoch(pl.col("timestamp_ms"), unit="ms"),
pl.from_epoch(pl.col("timestamp_us"), unit="us"),
pl.from_epoch(pl.col("timestamp_ns"), unit="ns"),
]
).collect()

assert_frame_equal(ldf_result, expected)

with pytest.raises(ValueError):
ts_col = pl.col("timestamp_s")
_ = ldf.select(pl.from_epoch(ts_col, unit="s2")) # type: ignore[call-overload]


def test_from_epoch_seq_input() -> None:
seq_input = [1147880044]
expected = pl.Series([datetime(2006, 5, 17, 15, 34, 4)])
result = pl.from_epoch(seq_input)
assert_series_equal(result, expected)
32 changes: 32 additions & 0 deletions py-polars/tests/unit/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@
Float64,
Int32,
Int64,
PolarsDataType,
Time,
UInt32,
UInt64,
)
from polars.internals.type_aliases import EpochTimeUnit
from polars.testing import assert_frame_equal, assert_series_equal
from polars.testing._private import verify_series_and_expr_api

Expand Down Expand Up @@ -2310,3 +2312,33 @@ class XSeries(pl.Series):
def test_builtin_abs() -> None:
s = pl.Series("s", [-1, 0, 1, None])
assert abs(s).to_list() == [1, 0, 1, None]


@pytest.mark.parametrize(
"value, unit, exp, exp_type",
[
(13285, "d", date(2006, 5, 17), pl.Date),
(1147880044, "s", datetime(2006, 5, 17, 15, 34, 4), pl.Datetime),
(1147880044 * 1_000, "ms", datetime(2006, 5, 17, 15, 34, 4), pl.Datetime("ms")),
(
1147880044 * 1_000_000,
"us",
datetime(2006, 5, 17, 15, 34, 4),
pl.Datetime("us"),
),
(
1147880044 * 1_000_000_000,
"ns",
datetime(2006, 5, 17, 15, 34, 4),
pl.Datetime("ns"),
),
],
)
def test_from_epoch_expr(
value: int, unit: EpochTimeUnit, exp: date | datetime, exp_type: PolarsDataType
) -> None:
s = pl.Series("timestamp", [value, None])
result = pl.from_epoch(s, unit=unit)

expected = pl.Series("timestamp", [exp, None]).cast(exp_type)
assert_series_equal(result, expected)

0 comments on commit 7c3fb74

Please sign in to comment.