Skip to content

Commit

Permalink
Fix datetime consistency errors in filtering/cast ops (#4346)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie committed Aug 9, 2022
1 parent 0177350 commit 7af0082
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 33 deletions.
32 changes: 17 additions & 15 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
.idea/
.vscode/
target/
Cargo.lock
data/
__pycache__/
.ipynb_checkpoints/
polars/vendor
*.iml
*.so
.DS_Store
.ENV
.coverage
.env
.hypothesis/
.idea/
.ipynb_checkpoints/
.mypy_cache/
.pytest_cache/
.python-version
.vscode/
__pycache__/
AUTO_CHANGELOG.md
node_modules/
.coverage
.hypothesis
venv/
*.iml
Cargo.lock
coverage.lcov
coverage.xml
.DS_Store
.python-version
data/
node_modules/
polars/vendor
target/
venv/
10 changes: 5 additions & 5 deletions py-polars/polars/internals/lazy_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from polars.utils import (
_datetime_to_pl_timestamp,
_timedelta_to_pl_timedelta,
in_nanoseconds_window,
timedelta_in_nanoseconds_window,
)

Expand Down Expand Up @@ -642,16 +641,15 @@ def lit(value: Any, dtype: type[DataType] | None = None) -> pli.Expr:
"""
if isinstance(value, datetime):
if in_nanoseconds_window(value):
tu = "ns"
else:
tu = "ms"
tu = "us"
return (
lit(_datetime_to_pl_timestamp(value, tu))
.cast(Datetime)
.dt.with_time_unit(tu)
)
if isinstance(value, timedelta):
# TODO: python timedelta should also default to 'us' units.
# (needs some corresponding work on the Rust side first)
if timedelta_in_nanoseconds_window(value):
tu = "ns"
else:
Expand Down Expand Up @@ -684,6 +682,8 @@ def lit(value: Any, dtype: type[DataType] | None = None) -> pli.Expr:
item = value.item()
except AttributeError:
item = value
if isinstance(item, datetime):
return lit(item)
return pli.wrap_expr(pylit(item))


Expand Down
6 changes: 2 additions & 4 deletions py-polars/polars/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,8 @@ def _timedelta_to_pl_timedelta(td: timedelta, tu: str | None = None) -> int:
elif tu == "ms":
return int(td.total_seconds() * 1e3)
if tu is None:
if timedelta_in_nanoseconds_window(td):
return int(td.total_seconds() * 1e9)
else:
return int(td.total_seconds() * 1e3)
# python has us precision
return int(td.total_seconds() * 1e6)
else:
raise ValueError("expected one of {'ns', 'us, 'ms'}")

Expand Down
44 changes: 36 additions & 8 deletions py-polars/tests/test_datelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,38 @@ def test_from_numpy() -> None:


def test_datetime_consistency() -> None:
# dt = datetime(2021, 1, 1, 10, 30, 45, 123456)
dt = datetime(2021, 1, 1, 10, 30, 45, 123000)
dt = datetime(2022, 7, 5, 10, 30, 45, 123455)
df = pl.DataFrame({"date": [dt]})

assert df["date"].dt[0] == dt
assert df.select(pl.lit(dt))["literal"].dt[0] == dt
assert df.filter(pl.col("date") == dt).rows() == [(dt,)]

ddf = df.select(
[
pl.col("date"),
pl.lit(dt).alias("dt"),
pl.lit(dt).cast(pl.Datetime("ms")).alias("dt_ms"),
pl.lit(dt).cast(pl.Datetime("us")).alias("dt_us"),
pl.lit(dt).cast(pl.Datetime("ns")).alias("dt_ns"),
]
)
assert ddf.schema == {
"date": pl.Datetime("us"),
"dt": pl.Datetime("us"),
"dt_ms": pl.Datetime("ms"),
"dt_us": pl.Datetime("us"),
"dt_ns": pl.Datetime("ns"),
}
assert ddf.select([pl.col(c).cast(int) for c in ddf.schema]).rows() == [
(
1657017045123455,
1657017045123455,
1657017045123,
1657017045123455,
1657017045123455000,
)
]


def test_timezone() -> None:
Expand Down Expand Up @@ -976,21 +1003,22 @@ def test_datetime_units() -> None:


def test_datetime_instance_selection() -> None:
test_data = {
"ns": [datetime(2022, 12, 31, 1, 2, 3)],
"us": [datetime(2022, 12, 31, 4, 5, 6)],
"ms": [datetime(2022, 12, 31, 7, 8, 9)],
}
df = pl.DataFrame(
data={
"ns": [datetime(2022, 12, 31, 1, 2, 3)],
"us": [datetime(2022, 12, 31, 4, 5, 6)],
"ms": [datetime(2022, 12, 31, 7, 8, 9)],
},
data=test_data,
columns=[
("ns", pl.Datetime("ns")),
("us", pl.Datetime("us")),
("ms", pl.Datetime("ms")),
],
)

for tu in DTYPE_TEMPORAL_UNITS:
assert df.select(pl.col([pl.Datetime(tu)])).dtypes == [pl.Datetime(tu)]
assert len(df.filter(pl.col(tu) == test_data[tu][0])) == 1


def test_unique_counts_on_dates() -> None:
Expand Down
4 changes: 3 additions & 1 deletion py-polars/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,12 @@ def test_date_to_pl_date() -> None:
def test_timedelta_to_pl_timedelta() -> None:
out = _timedelta_to_pl_timedelta(timedelta(days=1), "ns")
assert out == 86_400_000_000_000
out = _timedelta_to_pl_timedelta(timedelta(days=1), "us")
assert out == 86_400_000_000
out = _timedelta_to_pl_timedelta(timedelta(days=1), "ms")
assert out == 86_400_000
out = _timedelta_to_pl_timedelta(timedelta(days=1), tu=None)
assert out == 86_400_000_000_000
assert out == 86_400_000_000


def test_estimated_size() -> None:
Expand Down

0 comments on commit 7af0082

Please sign in to comment.