Skip to content

Commit

Permalink
python datetime consistency (#4221)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Aug 2, 2022
1 parent ec4f7e8 commit d1b4d02
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 22 deletions.
48 changes: 32 additions & 16 deletions py-polars/polars/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def sequence_to_pyseries(
strict: bool = True,
) -> PySeries:
"""Construct a PySeries from a sequence."""
dtype_: type | None = None
python_dtype: type | None = None
nested_dtype: PolarsDataType | type | None = None
temporal_unit: str | None = None

Expand All @@ -154,49 +154,65 @@ def sequence_to_pyseries(
# lists defer to subsequent handling; identify nested type
elif dtype == List:
nested_dtype = getattr(dtype, "inner", None)
dtype_ = list
python_dtype = list

# infer temporal type handling
py_temporal_types = {date, datetime, timedelta, time}
pl_temporal_types = {Date, Datetime, Duration, Time}

value = _get_first_non_none(values)
if value is not None:
# this branch is for dtypes set with python types.
# eg. 'datetime.date/datetime.datetime'
# and values that are integers
# if this holds we take the physical branch
# if the values are also python types we take the temporal branch
if dtype in py_temporal_types and isinstance(value, int):
dtype = py_type_to_dtype(dtype) # construct from integer
elif (
dtype in pl_temporal_types or type(dtype) in pl_temporal_types
) and not isinstance(value, int):
temporal_unit = getattr(dtype, "tu", None)
dtype_ = dtype_to_py_type(dtype) # type: ignore[arg-type]
python_dtype = dtype_to_py_type(dtype) # type: ignore[arg-type]

if (dtype is not None) and is_polars_dtype(dtype) and (dtype_ is None):
# physical branch
# flat data
if (dtype is not None) and is_polars_dtype(dtype) and (python_dtype is None):
constructor = polars_type_to_constructor(dtype)
pyseries = constructor(name, values, strict)

if dtype in (Date, Datetime, Duration, Time, Categorical):
pyseries = pyseries.cast(dtype, True)
return pyseries

else:
if dtype_ is None:
dtype_ = float if (value is None) else type(value)
if python_dtype is None:
python_dtype = float if (value is None) else type(value)

# temporal branch
if python_dtype in py_temporal_types:
if dtype is None:
dtype = py_type_to_dtype(python_dtype) # construct from integer
elif dtype in py_temporal_types:
dtype = py_type_to_dtype(dtype)

# if no temporal unit given, we use anyvalues, so that we have one
# consistent level of entry that sets the units and timezones
# (e.g. we ignore them). They can be set afterwards.
if dtype == Datetime and temporal_unit is None:
return PySeries.new_from_anyvalues(name, values)

if dtype_ in py_temporal_types:
if not _PYARROW_AVAILABLE: # pragma: no cover
raise ImportError(
"'pyarrow' is required for converting a Sequence of date or"
" datetime values to a PySeries."
)
# let arrow infer dtype if not timedelta
# arrow uses microsecond durations by default, not supported yet.
arrow_dtype = (
dtype_to_arrow_type(dtype)
if (dtype is not None and temporal_unit)
else None
)
arrow_dtype = dtype_to_arrow_type(dtype)
return arrow_to_pyseries(name, pa.array(values, type=arrow_dtype))

elif dtype_ in (list, tuple):
elif python_dtype in (list, tuple):
if nested_dtype is None:
nested_value = _get_first_non_none(value)
nested_dtype = type(nested_value) if nested_value is not None else float
Expand Down Expand Up @@ -255,12 +271,12 @@ def sequence_to_pyseries(
# Convert mixed sequences like `[[12], "foo", 9]`
return PySeries.new_object(name, values, strict)

elif dtype_ == pli.Series:
elif python_dtype == pli.Series:
return PySeries.new_series_list(name, [v.inner() for v in values], strict)
elif dtype_ == PySeries:
elif python_dtype == PySeries:
return PySeries.new_series_list(name, values, strict)
else:
constructor = py_type_to_constructor(dtype_)
constructor = py_type_to_constructor(python_dtype)

if constructor == PySeries.new_object:
try:
Expand Down
13 changes: 7 additions & 6 deletions py-polars/src/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -548,9 +548,9 @@ impl<'s> FromPyObject<'s> for Wrap<AnyValue<'s>> {
kwargs.set_item("is_dst", py.None())?;
let loc_tz = tz.call_method("localize", (dt,), Some(kwargs))?;
loc_tz.call_method0("timestamp")?;
// s to ms
let v = (ts.extract::<f64>()? * 1000.0) as i64;
Ok(AnyValue::Datetime(v, TimeUnit::Milliseconds, &None).into())
// s to us
let v = (ts.extract::<f64>()? * 1000_000.0) as i64;
Ok(AnyValue::Datetime(v, TimeUnit::Microseconds, &None).into())
}
// unix
#[cfg(not(target_arch = "windows"))]
Expand All @@ -561,9 +561,10 @@ impl<'s> FromPyObject<'s> for Wrap<AnyValue<'s>> {
kwargs.set_item("tzinfo", timezone.getattr("utc")?)?;
let dt = ob.call_method("replace", (), Some(kwargs))?;
let ts = dt.call_method0("timestamp")?;
// s to ms
let v = (ts.extract::<f64>()? * 1000.0) as i64;
Ok(AnyValue::Datetime(v, TimeUnit::Milliseconds, &None).into())
// s to us
let v = (ts.extract::<f64>()? * 1_000_000.0) as i64;
// we choose us as that is pythons default unit
Ok(AnyValue::Datetime(v, TimeUnit::Microseconds, &None).into())
}
} else if ob.is_none() {
Ok(AnyValue::Null.into())
Expand Down
9 changes: 9 additions & 0 deletions py-polars/tests/test_datelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1262,3 +1262,12 @@ def test_weekday() -> None:
assert s.dt.cast_time_unit(tu).dt.weekday()[0] == 0

assert s.cast(pl.Date).dt.weekday()[0] == 0


def test_from_dict_tu_consistency() -> None:
tz = pytz.timezone("PRC")
dt = datetime(2020, 8, 1, 12, 0, 0, tzinfo=tz)
from_dict = pl.from_dict({"dt": [dt]})
from_dicts = pl.from_dicts([{"dt": dt}])

assert from_dict.dtypes == from_dicts.dtypes

0 comments on commit d1b4d02

Please sign in to comment.