Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

python datetime consistency #4221

Merged
merged 1 commit into from
Aug 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
48 changes: 32 additions & 16 deletions py-polars/polars/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def sequence_to_pyseries(
strict: bool = True,
) -> PySeries:
"""Construct a PySeries from a sequence."""
dtype_: type | None = None
python_dtype: type | None = None
nested_dtype: PolarsDataType | type | None = None
temporal_unit: str | None = None

Expand All @@ -154,49 +154,65 @@ def sequence_to_pyseries(
# lists defer to subsequent handling; identify nested type
elif dtype == List:
nested_dtype = getattr(dtype, "inner", None)
dtype_ = list
python_dtype = list

# infer temporal type handling
py_temporal_types = {date, datetime, timedelta, time}
pl_temporal_types = {Date, Datetime, Duration, Time}

value = _get_first_non_none(values)
if value is not None:
# this branch is for dtypes set with python types.
# eg. 'datetime.date/datetime.datetime'
# and values that are integers
# if this holds we take the physical branch
# if the values are also python types we take the temporal branch
if dtype in py_temporal_types and isinstance(value, int):
dtype = py_type_to_dtype(dtype) # construct from integer
elif (
dtype in pl_temporal_types or type(dtype) in pl_temporal_types
) and not isinstance(value, int):
temporal_unit = getattr(dtype, "tu", None)
dtype_ = dtype_to_py_type(dtype) # type: ignore[arg-type]
python_dtype = dtype_to_py_type(dtype) # type: ignore[arg-type]

if (dtype is not None) and is_polars_dtype(dtype) and (dtype_ is None):
# physical branch
# flat data
if (dtype is not None) and is_polars_dtype(dtype) and (python_dtype is None):
constructor = polars_type_to_constructor(dtype)
pyseries = constructor(name, values, strict)

if dtype in (Date, Datetime, Duration, Time, Categorical):
pyseries = pyseries.cast(dtype, True)
return pyseries

else:
if dtype_ is None:
dtype_ = float if (value is None) else type(value)
if python_dtype is None:
python_dtype = float if (value is None) else type(value)

# temporal branch
if python_dtype in py_temporal_types:
if dtype is None:
dtype = py_type_to_dtype(python_dtype) # construct from integer
elif dtype in py_temporal_types:
dtype = py_type_to_dtype(dtype)

# if no temporal unit given, we use anyvalues, so that we have one
# consistent level of entry that sets the units and timezones
# (e.g. we ignore them). They can be set afterwards.
if dtype == Datetime and temporal_unit is None:
return PySeries.new_from_anyvalues(name, values)

if dtype_ in py_temporal_types:
if not _PYARROW_AVAILABLE: # pragma: no cover
raise ImportError(
"'pyarrow' is required for converting a Sequence of date or"
" datetime values to a PySeries."
)
# let arrow infer dtype if not timedelta
# arrow uses microsecond durations by default, not supported yet.
arrow_dtype = (
dtype_to_arrow_type(dtype)
if (dtype is not None and temporal_unit)
else None
)
arrow_dtype = dtype_to_arrow_type(dtype)
return arrow_to_pyseries(name, pa.array(values, type=arrow_dtype))

elif dtype_ in (list, tuple):
elif python_dtype in (list, tuple):
if nested_dtype is None:
nested_value = _get_first_non_none(value)
nested_dtype = type(nested_value) if nested_value is not None else float
Expand Down Expand Up @@ -255,12 +271,12 @@ def sequence_to_pyseries(
# Convert mixed sequences like `[[12], "foo", 9]`
return PySeries.new_object(name, values, strict)

elif dtype_ == pli.Series:
elif python_dtype == pli.Series:
return PySeries.new_series_list(name, [v.inner() for v in values], strict)
elif dtype_ == PySeries:
elif python_dtype == PySeries:
return PySeries.new_series_list(name, values, strict)
else:
constructor = py_type_to_constructor(dtype_)
constructor = py_type_to_constructor(python_dtype)

if constructor == PySeries.new_object:
try:
Expand Down
13 changes: 7 additions & 6 deletions py-polars/src/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -548,9 +548,9 @@ impl<'s> FromPyObject<'s> for Wrap<AnyValue<'s>> {
kwargs.set_item("is_dst", py.None())?;
let loc_tz = tz.call_method("localize", (dt,), Some(kwargs))?;
loc_tz.call_method0("timestamp")?;
// s to ms
let v = (ts.extract::<f64>()? * 1000.0) as i64;
Ok(AnyValue::Datetime(v, TimeUnit::Milliseconds, &None).into())
// s to us
let v = (ts.extract::<f64>()? * 1000_000.0) as i64;
Ok(AnyValue::Datetime(v, TimeUnit::Microseconds, &None).into())
}
// unix
#[cfg(not(target_arch = "windows"))]
Expand All @@ -561,9 +561,10 @@ impl<'s> FromPyObject<'s> for Wrap<AnyValue<'s>> {
kwargs.set_item("tzinfo", timezone.getattr("utc")?)?;
let dt = ob.call_method("replace", (), Some(kwargs))?;
let ts = dt.call_method0("timestamp")?;
// s to ms
let v = (ts.extract::<f64>()? * 1000.0) as i64;
Ok(AnyValue::Datetime(v, TimeUnit::Milliseconds, &None).into())
// s to us
let v = (ts.extract::<f64>()? * 1_000_000.0) as i64;
// we choose us as that is pythons default unit
Ok(AnyValue::Datetime(v, TimeUnit::Microseconds, &None).into())
}
} else if ob.is_none() {
Ok(AnyValue::Null.into())
Expand Down
9 changes: 9 additions & 0 deletions py-polars/tests/test_datelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1262,3 +1262,12 @@ def test_weekday() -> None:
assert s.dt.cast_time_unit(tu).dt.weekday()[0] == 0

assert s.cast(pl.Date).dt.weekday()[0] == 0


def test_from_dict_tu_consistency() -> None:
tz = pytz.timezone("PRC")
dt = datetime(2020, 8, 1, 12, 0, 0, tzinfo=tz)
from_dict = pl.from_dict({"dt": [dt]})
from_dicts = pl.from_dicts([{"dt": dt}])

assert from_dict.dtypes == from_dicts.dtypes