Skip to content

Commit

Permalink
python: refactor tests and add literal datetime.date
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Nov 13, 2021
1 parent e6260f2 commit 42e680d
Show file tree
Hide file tree
Showing 7 changed files with 171 additions and 160 deletions.
4 changes: 2 additions & 2 deletions py-polars/polars/lazy/expr.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import copy
import typing as tp
from datetime import datetime
from datetime import date, datetime
from typing import Any, Callable, Optional, Sequence, Type, Union

import numpy as np
Expand Down Expand Up @@ -2345,7 +2345,7 @@ def expr_to_lit_or_expr(
"""
if isinstance(expr, str) and not str_to_lit:
return col(expr)
elif isinstance(expr, (int, float, str, pl.Series, datetime)) or expr is None:
elif isinstance(expr, (int, float, str, pl.Series, datetime, date)) or expr is None:
return lit(expr)
elif isinstance(expr, list):
return [expr_to_lit_or_expr(e, str_to_lit=str_to_lit) for e in expr] # type: ignore[return-value]
Expand Down
4 changes: 3 additions & 1 deletion py-polars/polars/lazy/functions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import typing as tp
from datetime import datetime, timezone
from datetime import date, datetime, timezone
from inspect import isclass
from typing import Any, Callable, Optional, Type, Union

Expand Down Expand Up @@ -407,6 +407,8 @@ def lit(
return lit(int((value.replace(tzinfo=timezone.utc)).timestamp() * 1e3)).cast(
pl.Datetime
)
if isinstance(value, date):
return lit(datetime(value.year, value.month, value.day)).cast(pl.Date)

if isinstance(value, pl.Series):
name = value.name
Expand Down
149 changes: 148 additions & 1 deletion py-polars/tests/test_datelike.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from datetime import datetime
from datetime import date, datetime

import polars as pl

Expand All @@ -13,3 +13,150 @@ def test_fill_null():
assert out.null_count() == 0
assert out.dt[0] == dt
assert out.dt[1] == dt

dt1 = date(2001, 1, 1)
dt2 = date(2001, 1, 2)
dt3 = date(2001, 1, 3)
s = pl.Series("a", [dt1, dt2, dt3, None])
dt = date(2001, 1, 4)
for fill_val in (dt, pl.lit(dt)):
out = s.fill_null(fill_val)

assert out.null_count() == 0
assert out.dt[0] == dt1
assert out.dt[1] == dt2
assert out.dt[-1] == dt


def test_downsample():
s = pl.Series(
"datetime",
[
946684800000,
946684860000,
946684920000,
946684980000,
946685040000,
946685100000,
946685160000,
946685220000,
946685280000,
946685340000,
946685400000,
946685460000,
946685520000,
946685580000,
946685640000,
946685700000,
946685760000,
946685820000,
946685880000,
946685940000,
],
).cast(pl.Datetime)
s2 = s.clone()
df = pl.DataFrame({"a": s, "b": s2})
out = df.downsample("a", rule="minute", n=5).first()
assert out.shape == (4, 2)

# OLHC
out = df.downsample("a", rule="minute", n=5).agg(
{"b": ["first", "min", "max", "last"]}
)
assert out.shape == (4, 5)

# test to_pandas as well.
out = df.to_pandas()
assert out["a"].dtype == "datetime64[ns]"


def test_filter_date():
dataset = pl.DataFrame(
{"date": ["2020-01-02", "2020-01-03", "2020-01-04"], "index": [1, 2, 3]}
)
df = dataset.with_column(pl.col("date").str.strptime(pl.Date, "%Y-%m-%d"))
assert df.filter(pl.col("date") <= pl.lit(datetime(2019, 1, 3))).is_empty()
assert df.filter(pl.col("date") < pl.lit(datetime(2020, 1, 4))).shape[0] == 2
assert df.filter(pl.col("date") < pl.lit(datetime(2020, 1, 5))).shape[0] == 3
assert df.filter(pl.col("date") <= pl.lit(datetime(2019, 1, 3))).is_empty()
assert df.filter(pl.col("date") < pl.lit(datetime(2020, 1, 4))).shape[0] == 2
assert df.filter(pl.col("date") < pl.lit(datetime(2020, 1, 5))).shape[0] == 3


def test_diff_datetime():

df = pl.DataFrame(
{
"timestamp": ["2021-02-01", "2021-03-1", "2021-04-1"],
"guild": [1, 2, 3],
"char": ["a", "a", "b"],
}
)

out = (
df.with_columns(
[
pl.col("timestamp").str.strptime(pl.Date, fmt="%Y-%m-%d"),
]
).with_columns([pl.col("timestamp").diff().over(pl.col("char"))])
)["timestamp"]

assert out[0] == out[1]


def test_timestamp():
a = pl.Series("a", [10000, 20000, 30000], dtype=pl.Datetime)
assert a.dt.timestamp() == [10000, 20000, 30000]
out = a.dt.to_python_datetime()
assert isinstance(out[0], datetime)
assert a.dt.min() == out[0]
assert a.dt.max() == out[2]

df = pl.DataFrame([out])
# test if rows returns objects
assert isinstance(df.row(0)[0], datetime)


def test_from_pydatetime():
dates = [
datetime(2021, 1, 1),
datetime(2021, 1, 2),
datetime(2021, 1, 3),
datetime(2021, 1, 4, 12, 12),
None,
]
s = pl.Series("name", dates)
assert s.dtype == pl.Datetime
assert s.name == "name"
assert s.null_count() == 1
assert s.dt[0] == dates[0]
# fmt dates and nulls
print(s)

dates = [date(2021, 1, 1), date(2021, 1, 2), date(2021, 1, 3), None]
s = pl.Series("name", dates)
assert s.dtype == pl.Date
assert s.name == "name"
assert s.null_count() == 1
assert s.dt[0] == dates[0]

# fmt dates and nulls
print(s)


def test_to_python_datetime():
df = pl.DataFrame({"a": [1, 2, 3]})
assert (
df.select(pl.col("a").cast(pl.Datetime).dt.to_python_datetime())["a"].dtype
== pl.Object
)
assert (
df.select(pl.col("a").cast(pl.Datetime).dt.timestamp())["a"].dtype == pl.Int64
)


def test_datetime_consistency():
dt = datetime(2021, 1, 1)
df = pl.DataFrame({"date": [dt]})
assert df["date"].dt[0] == dt
assert df.select(pl.lit(dt))["literal"].dt[0] == dt
82 changes: 0 additions & 82 deletions py-polars/tests/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,48 +290,6 @@ def test_from_arrow():
assert pl.from_arrow(tbl).shape == (2, 5)


def test_downsample():
s = pl.Series(
"datetime",
[
946684800000,
946684860000,
946684920000,
946684980000,
946685040000,
946685100000,
946685160000,
946685220000,
946685280000,
946685340000,
946685400000,
946685460000,
946685520000,
946685580000,
946685640000,
946685700000,
946685760000,
946685820000,
946685880000,
946685940000,
],
).cast(pl.Datetime)
s2 = s.clone()
df = pl.DataFrame({"a": s, "b": s2})
out = df.downsample("a", rule="minute", n=5).first()
assert out.shape == (4, 2)

# OLHC
out = df.downsample("a", rule="minute", n=5).agg(
{"b": ["first", "min", "max", "last"]}
)
assert out.shape == (4, 5)

# test to_pandas as well.
out = df.to_pandas()
assert out["a"].dtype == "datetime64[ns]"


def test_sort():
df = pl.DataFrame({"a": [2, 1, 3], "b": [1, 2, 3]})
df.sort("a", in_place=True)
Expand Down Expand Up @@ -690,12 +648,6 @@ def test_df_stats(df):
df.quantile(0.4)


def test_from_pandas_datetime():
df = pd.DataFrame({"datetime": ["2021-01-01", "2021-01-02"], "foo": [1, 2]})
df["datetime"] = pd.to_datetime(df["datetime"])
pl.from_pandas(df)


def test_df_fold():
df = pl.DataFrame({"a": [2, 1, 3], "b": [1, 2, 3], "c": [1.0, 2.0, 3.0]})

Expand Down Expand Up @@ -1047,19 +999,6 @@ def test_h_agg():
assert df.mean(axis=1, null_strategy="propagate")[1] is None


def test_filter_date():
dataset = pl.DataFrame(
{"date": ["2020-01-02", "2020-01-03", "2020-01-04"], "index": [1, 2, 3]}
)
df = dataset.with_column(pl.col("date").str.strptime(pl.Date, "%Y-%m-%d"))
assert df.filter(pl.col("date") <= pl.lit(datetime(2019, 1, 3))).is_empty()
assert df.filter(pl.col("date") < pl.lit(datetime(2020, 1, 4))).shape[0] == 2
assert df.filter(pl.col("date") < pl.lit(datetime(2020, 1, 5))).shape[0] == 3
assert df.filter(pl.col("date") <= pl.lit(datetime(2019, 1, 3))).is_empty()
assert df.filter(pl.col("date") < pl.lit(datetime(2020, 1, 4))).shape[0] == 2
assert df.filter(pl.col("date") < pl.lit(datetime(2020, 1, 5))).shape[0] == 3


def test_slicing():
# https://github.com/pola-rs/polars/issues/1322
n = 20
Expand Down Expand Up @@ -1235,27 +1174,6 @@ def test_filter_with_all_expansion():
assert out.shape == (2, 3)


def test_diff_datetime():

df = pl.DataFrame(
{
"timestamp": ["2021-02-01", "2021-03-1", "2021-04-1"],
"guild": [1, 2, 3],
"char": ["a", "a", "b"],
}
)

out = (
df.with_columns(
[
pl.col("timestamp").str.strptime(pl.Date, fmt="%Y-%m-%d"),
]
).with_columns([pl.col("timestamp").diff().over(pl.col("char"))])
)["timestamp"]

assert out[0] == out[1]


def test_diag_concat():
a = pl.DataFrame({"a": [1, 2]})
b = pl.DataFrame({"b": ["a", "b"], "c": [1, 2]})
Expand Down
18 changes: 18 additions & 0 deletions py-polars/tests/test_interop.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np
import pandas as pd
import pyarrow as pa
import pytest

import polars as pl

Expand All @@ -28,6 +29,10 @@ def test_from_pandas_datetime():
# checks lazy dispatch
pl.DataFrame([s.rename("foo")])[pl.col("foo").dt.round("hour", 2)]

df = pd.DataFrame({"datetime": ["2021-01-01", "2021-01-02"], "foo": [1, 2]})
df["datetime"] = pd.to_datetime(df["datetime"])
pl.from_pandas(df)


def test_arrow_list_roundtrip():
# https://github.com/pola-rs/polars/issues/1064
Expand Down Expand Up @@ -117,3 +122,16 @@ def test_from_pandas_series():
pd_series = pd.Series([1, 2, 3], name="pd")
df = pl.from_pandas(pd_series)
assert df.shape == (3,)


def test_from_pandas_nan_to_none():
from pyarrow import ArrowInvalid

df = pd.Series([2, np.nan, None], name="pd")
out_true = pl.from_pandas(df)
out_false = pl.from_pandas(df, nan_to_none=False)
df.loc[2] = pd.NA
assert [val is None for val in out_true]
assert [np.isnan(val) for val in out_false[1:]]
with pytest.raises(ArrowInvalid, match="Could not convert"):
pl.from_pandas(df, nan_to_none=False)
18 changes: 0 additions & 18 deletions py-polars/tests/test_lazy.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from datetime import datetime

import numpy as np
import pytest

Expand Down Expand Up @@ -390,15 +388,6 @@ def test_literal_projection():
assert df.select([2.0]).dtypes == [pl.Float64]


def test_to_python_datetime():
df = pl.DataFrame({"a": [1, 2, 3]})
assert (
df.select(col("a").cast(pl.Datetime).dt.to_python_datetime())["a"].dtype
== pl.Object
)
assert df.select(col("a").cast(pl.Datetime).dt.timestamp())["a"].dtype == pl.Int64


def test_interpolate():
df = pl.DataFrame({"a": [1, None, 3]})
assert df.select(col("a").interpolate())["a"] == [1, 2, 3]
Expand Down Expand Up @@ -558,13 +547,6 @@ def test_ufunc():
assert out["a"][1] == 0.6931471805599453


def test_datetime_consistency():
dt = datetime(2021, 1, 1)
df = pl.DataFrame({"date": [dt]})
assert df["date"].dt[0] == dt
assert df.select(lit(dt))["literal"].dt[0] == dt


def test_clip():
df = pl.DataFrame({"a": [1, 2, 3, 4, 5]})
assert df.select(pl.col("a").clip(2, 4))["a"].to_list() == [2, 2, 3, 4, 4]
Expand Down

0 comments on commit 42e680d

Please sign in to comment.