Skip to content

Commit

Permalink
don't cast pandas datetime[ns] to datetime[ms] (#2936)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Mar 20, 2022
1 parent 7c743bf commit ffe5162
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 14 deletions.
11 changes: 1 addition & 10 deletions py-polars/polars/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,16 +232,7 @@ def _pandas_series_to_arrow(
-------
"""
dtype = values.dtype
if dtype == "datetime64[ns]":
# We first cast to ms because that's the unit of Datetime,
# Then we cast to via int64 to datetime. Casting directly to Datetime lead to
# loss of time information https://github.com/pola-rs/polars/issues/476
arr = pa.array(
np.array(values.values, dtype="datetime64[ms]"), from_pandas=nan_to_none
)
arr = pa.compute.cast(arr, pa.int64())
return pa.compute.cast(arr, pa.timestamp("ms"))
elif dtype == "object" and len(values) > 0:
if dtype == "object" and len(values) > 0:
if isinstance(values.values[0], str):
return pa.array(values, pa.large_utf8(), from_pandas=nan_to_none)

Expand Down
16 changes: 12 additions & 4 deletions py-polars/tests/test_interop.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import datetime
from datetime import datetime

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -59,7 +59,7 @@ def test_from_pandas_nan_to_none() -> None:


def test_from_pandas_datetime() -> None:
ts = datetime.datetime(2021, 1, 1, 20, 20, 20, 20)
ts = datetime(2021, 1, 1, 20, 20, 20, 20)
pl_s = pd.Series([ts, ts])
tmp = pl.from_pandas(pl_s.to_frame("a"))
s = tmp["a"]
Expand All @@ -71,8 +71,8 @@ def test_from_pandas_datetime() -> None:
"2021-06-24 00:00:00", "2021-06-24 10:00:00", freq="1H", closed="left"
)
s = pl.from_pandas(date_times)
assert s[0] == datetime.datetime(2021, 6, 24, 0, 0)
assert s[-1] == datetime.datetime(2021, 6, 24, 9, 0)
assert s[0] == datetime(2021, 6, 24, 0, 0)
assert s[-1] == datetime(2021, 6, 24, 9, 0)

df = pd.DataFrame({"datetime": ["2021-01-01", "2021-01-02"], "foo": [1, 2]})
df["datetime"] = pd.to_datetime(df["datetime"])
Expand Down Expand Up @@ -266,3 +266,11 @@ def test_to_pandas_series() -> None:

def test_respect_dtype_with_series_from_numpy() -> None:
assert pl.Series("foo", np.array([1, 2, 3]), dtype=pl.UInt32).dtype == pl.UInt32


def test_from_pandas_ns_resolution() -> None:
df = pd.DataFrame(
[pd.Timestamp(year=2021, month=1, day=1, hour=1, second=1, nanosecond=1)],
columns=["date"],
)
assert pl.from_pandas(df)[0, 0] == datetime(2021, 1, 1, 1, 0, 1)

0 comments on commit ffe5162

Please sign in to comment.