From 066ec408ac6f4a01fcf528bda36aa0a280eca378 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Wed, 28 Feb 2024 13:18:16 +0100 Subject: [PATCH] refactor(python): Refactor `utils/convert.py` module (#14739) --- .../polars-plan/src/logical_plan/pyarrow.rs | 20 +- py-polars/polars/dataframe/frame.py | 6 +- py-polars/polars/dataframe/group_by.py | 12 +- py-polars/polars/datatypes/convert.py | 15 +- py-polars/polars/expr/datetime.py | 10 +- py-polars/polars/expr/expr.py | 10 +- py-polars/polars/functions/lit.py | 16 +- py-polars/polars/functions/range/_utils.py | 4 +- py-polars/polars/io/iceberg.py | 6 +- .../io/pyarrow_dataset/anonymous_scan.py | 16 +- py-polars/polars/lazyframe/frame.py | 18 +- py-polars/polars/series/datetime.py | 10 +- py-polars/polars/series/series.py | 16 +- py-polars/polars/utils/__init__.py | 32 +- py-polars/polars/utils/convert.py | 302 +++++++++--------- py-polars/src/conversion/any_value.rs | 16 +- py-polars/src/conversion/chunked_array.rs | 18 +- .../tests/parametric/test_groupby_rolling.py | 8 +- .../unit/constructors/test_constructors.py | 14 +- py-polars/tests/unit/dataframe/test_df.py | 9 +- .../tests/unit/datatypes/test_temporal.py | 150 ++++----- py-polars/tests/unit/expr/test_exprs.py | 15 +- .../functions/as_datatype/test_as_datatype.py | 2 +- .../functions/range/test_datetime_range.py | 16 +- .../tests/unit/namespaces/test_datetime.py | 21 +- .../tests/unit/namespaces/test_strptime.py | 6 +- .../unit/operations/test_group_by_dynamic.py | 10 +- py-polars/tests/unit/series/test_series.py | 2 +- py-polars/tests/unit/utils/test_utils.py | 102 +++--- 29 files changed, 430 insertions(+), 452 deletions(-) diff --git a/crates/polars-plan/src/logical_plan/pyarrow.rs b/crates/polars-plan/src/logical_plan/pyarrow.rs index 91ba9aca3ea9..82d83541de63 100644 --- a/crates/polars-plan/src/logical_plan/pyarrow.rs +++ b/crates/polars-plan/src/logical_plan/pyarrow.rs @@ -13,11 +13,11 @@ pub(super) struct Args { } fn to_py_datetime(v: i64, tu: &TimeUnit, tz: Option<&TimeZone>) -> String { - // note: `_to_python_datetime` and the `Datetime` + // note: `to_py_datetime` and the `Datetime` // dtype have to be in-scope on the python side match tz { - None => format!("_to_python_datetime({},'{}')", v, tu.to_ascii()), - Some(tz) => format!("_to_python_datetime({},'{}',{})", v, tu.to_ascii(), tz), + None => format!("to_py_datetime({},'{}')", v, tu.to_ascii()), + Some(tz) => format!("to_py_datetime({},'{}',{})", v, tu.to_ascii(), tz), } } @@ -53,7 +53,7 @@ pub(super) fn predicate_to_pa( let dtm = to_py_datetime(v, &tu, tz.as_ref()); write!(list_repr, "{dtm},").unwrap(); } else if let AnyValue::Date(v) = av { - write!(list_repr, "_to_python_date({v}),").unwrap(); + write!(list_repr, "to_py_date({v}),").unwrap(); } else { write!(list_repr, "{av},").unwrap(); } @@ -79,25 +79,25 @@ pub(super) fn predicate_to_pa( }, #[cfg(feature = "dtype-date")] AnyValue::Date(v) => { - // the function `_to_python_date` and the `Date` + // the function `to_py_date` and the `Date` // dtype have to be in scope on the python side - Some(format!("_to_python_date({v})")) + Some(format!("to_py_date({v})")) }, #[cfg(feature = "dtype-datetime")] AnyValue::Datetime(v, tu, tz) => Some(to_py_datetime(v, &tu, tz.as_ref())), // Activate once pyarrow supports them // #[cfg(feature = "dtype-time")] // AnyValue::Time(v) => { - // // the function `_to_python_time` has to be in scope + // // the function `to_py_time` has to be in scope // // on the python side - // Some(format!("_to_python_time(value={v})")) + // Some(format!("to_py_time(value={v})")) // } // #[cfg(feature = "dtype-duration")] // AnyValue::Duration(v, tu) => { - // // the function `_to_python_timedelta` has to be in scope + // // the function `to_py_timedelta` has to be in scope // // on the python side // Some(format!( - // "_to_python_timedelta(value={}, tu='{}')", + // "to_py_timedelta(value={}, tu='{}')", // v, // tu.to_ascii() // )) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 235b1b47f214..b4728041aae2 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -89,7 +89,7 @@ ) from polars.utils._parse_expr_input import parse_as_expression from polars.utils._wrap import wrap_expr, wrap_ldf, wrap_s -from polars.utils.convert import _timedelta_to_pl_duration +from polars.utils.convert import parse_as_duration_string from polars.utils.deprecation import ( deprecate_function, deprecate_nonkeyword_arguments, @@ -6061,8 +6061,8 @@ def upsample( if offset is None: offset = "0ns" - every = _timedelta_to_pl_duration(every) - offset = _timedelta_to_pl_duration(offset) + every = parse_as_duration_string(every) + offset = parse_as_duration_string(offset) return self._from_pydf( self._df.upsample(by, time_column, every, offset, maintain_order) diff --git a/py-polars/polars/dataframe/group_by.py b/py-polars/polars/dataframe/group_by.py index fd89b8256bd1..f6ad0144098b 100644 --- a/py-polars/polars/dataframe/group_by.py +++ b/py-polars/polars/dataframe/group_by.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, Callable, Iterable, Iterator from polars import functions as F -from polars.utils.convert import _timedelta_to_pl_duration +from polars.utils.convert import parse_as_duration_string from polars.utils.deprecation import ( deprecate_renamed_function, issue_deprecation_warning, @@ -792,8 +792,8 @@ def __init__( by: IntoExpr | Iterable[IntoExpr] | None, check_sorted: bool, ): - period = _timedelta_to_pl_duration(period) - offset = _timedelta_to_pl_duration(offset) + period = parse_as_duration_string(period) + offset = parse_as_duration_string(offset) self.df = df self.time_column = index_column @@ -969,9 +969,9 @@ def __init__( start_by: StartBy, check_sorted: bool, ): - every = _timedelta_to_pl_duration(every) - period = _timedelta_to_pl_duration(period) - offset = _timedelta_to_pl_duration(offset) + every = parse_as_duration_string(every) + period = parse_as_duration_string(period) + offset = parse_as_duration_string(offset) self.df = df self.time_column = index_column diff --git a/py-polars/polars/datatypes/convert.py b/py-polars/polars/datatypes/convert.py index de84dcaaedd1..c7da2453d064 100644 --- a/py-polars/polars/datatypes/convert.py +++ b/py-polars/polars/datatypes/convert.py @@ -66,7 +66,7 @@ if TYPE_CHECKING: from typing import Literal - from polars.type_aliases import PolarsDataType, PythonDataType, SchemaDict + from polars.type_aliases import PolarsDataType, PythonDataType, SchemaDict, TimeUnit PY_STR_TO_DTYPE: SchemaDict = { @@ -486,16 +486,17 @@ def maybe_cast(el: Any, dtype: PolarsDataType) -> Any: """Try casting a value to a value that is valid for the given Polars dtype.""" # cast el if it doesn't match from polars.utils.convert import ( - _datetime_to_pl_timestamp, - _timedelta_to_pl_timedelta, + datetime_to_int, + timedelta_to_int, ) + time_unit: TimeUnit if isinstance(el, datetime): - time_unit = getattr(dtype, "time_unit", None) - return _datetime_to_pl_timestamp(el, time_unit) + time_unit = getattr(dtype, "time_unit", "us") + return datetime_to_int(el, time_unit) elif isinstance(el, timedelta): - time_unit = getattr(dtype, "time_unit", None) - return _timedelta_to_pl_timedelta(el, time_unit) + time_unit = getattr(dtype, "time_unit", "us") + return timedelta_to_int(el, time_unit) py_type = dtype_to_py_type(dtype) if not isinstance(el, py_type): diff --git a/py-polars/polars/expr/datetime.py b/py-polars/polars/expr/datetime.py index 5debd23daa5b..823f56604644 100644 --- a/py-polars/polars/expr/datetime.py +++ b/py-polars/polars/expr/datetime.py @@ -8,7 +8,7 @@ from polars.datatypes import DTYPE_TEMPORAL_UNITS, Date, Int32 from polars.utils._parse_expr_input import parse_as_expression from polars.utils._wrap import wrap_expr -from polars.utils.convert import _timedelta_to_pl_duration +from polars.utils.convert import parse_as_duration_string from polars.utils.deprecation import ( deprecate_function, deprecate_renamed_function, @@ -183,7 +183,7 @@ def truncate( every = deprecate_saturating(every) offset = deprecate_saturating(offset) if not isinstance(every, pl.Expr): - every = _timedelta_to_pl_duration(every) + every = parse_as_duration_string(every) if use_earliest is not None: issue_deprecation_warning( @@ -203,7 +203,7 @@ def truncate( return wrap_expr( self._pyexpr.dt_truncate( every, - _timedelta_to_pl_duration(offset), + parse_as_duration_string(offset), ) ) @@ -340,8 +340,8 @@ def round( return wrap_expr( self._pyexpr.dt_round( - _timedelta_to_pl_duration(every), - _timedelta_to_pl_duration(offset), + parse_as_duration_string(every), + parse_as_duration_string(offset), ) ) diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index dcfa733d378c..699de0973a4b 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -48,7 +48,7 @@ parse_as_list_of_expressions, parse_predicates_constraints_as_expression, ) -from polars.utils.convert import _negate_duration, _timedelta_to_pl_duration +from polars.utils.convert import negate_duration_string, parse_as_duration_string from polars.utils.deprecation import ( deprecate_function, deprecate_nonkeyword_arguments, @@ -3420,10 +3420,10 @@ def rolling( period = deprecate_saturating(period) offset = deprecate_saturating(offset) if offset is None: - offset = _negate_duration(_timedelta_to_pl_duration(period)) + offset = negate_duration_string(parse_as_duration_string(period)) - period = _timedelta_to_pl_duration(period) - offset = _timedelta_to_pl_duration(offset) + period = parse_as_duration_string(period) + offset = parse_as_duration_string(offset) return self._from_pyexpr( self._pyexpr.rolling(index_column, period, offset, closed, check_sorted) @@ -10086,7 +10086,7 @@ def _prepare_rolling_window_args( min_periods = window_size window_size = f"{window_size}i" elif isinstance(window_size, timedelta): - window_size = _timedelta_to_pl_duration(window_size) + window_size = parse_as_duration_string(window_size) if min_periods is None: min_periods = 1 return window_size, min_periods diff --git a/py-polars/polars/functions/lit.py b/py-polars/polars/functions/lit.py index fc03ee0fa005..b02aa57e6ce1 100644 --- a/py-polars/polars/functions/lit.py +++ b/py-polars/polars/functions/lit.py @@ -10,10 +10,10 @@ from polars.dependencies import numpy as np from polars.utils._wrap import wrap_expr from polars.utils.convert import ( - _date_to_pl_date, - _datetime_to_pl_timestamp, - _time_to_pl_time, - _timedelta_to_pl_timedelta, + date_to_int, + datetime_to_int, + time_to_int, + timedelta_to_int, ) with contextlib.suppress(ImportError): # Module not available when building docs @@ -91,7 +91,7 @@ def lit( time_zone = tzinfo_str dt_utc = value.replace(tzinfo=timezone.utc) - dt_int = _datetime_to_pl_timestamp(dt_utc, time_unit) + dt_int = datetime_to_int(dt_utc, time_unit) expr = lit(dt_int).cast(Datetime(time_unit)) if time_zone is not None: expr = expr.dt.replace_time_zone( @@ -105,15 +105,15 @@ def lit( else: time_unit = "us" - td_int = _timedelta_to_pl_timedelta(value, time_unit) + td_int = timedelta_to_int(value, time_unit) return lit(td_int).cast(Duration(time_unit)) elif isinstance(value, time): - time_int = _time_to_pl_time(value) + time_int = time_to_int(value) return lit(time_int).cast(Time) elif isinstance(value, date): - date_int = _date_to_pl_date(value) + date_int = date_to_int(value) return lit(date_int).cast(Date) elif isinstance(value, pl.Series): diff --git a/py-polars/polars/functions/range/_utils.py b/py-polars/polars/functions/range/_utils.py index da1b38dbbd1c..173125996ef7 100644 --- a/py-polars/polars/functions/range/_utils.py +++ b/py-polars/polars/functions/range/_utils.py @@ -2,13 +2,13 @@ from datetime import timedelta -from polars.utils.convert import _timedelta_to_pl_duration +from polars.utils.convert import parse_as_duration_string def parse_interval_argument(interval: str | timedelta) -> str: """Parse the interval argument as a Polars duration string.""" if isinstance(interval, timedelta): - return _timedelta_to_pl_duration(interval) + return parse_as_duration_string(interval) if " " in interval: interval = interval.replace(" ", "") diff --git a/py-polars/polars/io/iceberg.py b/py-polars/polars/io/iceberg.py index 558604150f0c..fd5d3f6a4aa1 100644 --- a/py-polars/polars/io/iceberg.py +++ b/py-polars/polars/io/iceberg.py @@ -22,7 +22,7 @@ import polars._reexport as pl from polars.dependencies import pyiceberg -from polars.utils.convert import _to_python_date, _to_python_datetime +from polars.utils.convert import to_py_date, to_py_datetime if TYPE_CHECKING: from datetime import date, datetime @@ -34,8 +34,8 @@ __all__ = ["scan_iceberg"] _temporal_conversions: dict[str, Callable[..., datetime | date]] = { - "_to_python_date": _to_python_date, - "_to_python_datetime": _to_python_datetime, + "to_py_date": to_py_date, + "to_py_datetime": to_py_datetime, } diff --git a/py-polars/polars/io/pyarrow_dataset/anonymous_scan.py b/py-polars/polars/io/pyarrow_dataset/anonymous_scan.py index 2bae55a8be23..df639d478a96 100644 --- a/py-polars/polars/io/pyarrow_dataset/anonymous_scan.py +++ b/py-polars/polars/io/pyarrow_dataset/anonymous_scan.py @@ -73,10 +73,10 @@ def _scan_pyarrow_dataset_impl( if predicate: from polars.datatypes import Date, Datetime, Duration from polars.utils.convert import ( - _to_python_date, - _to_python_datetime, - _to_python_time, - _to_python_timedelta, + to_py_date, + to_py_datetime, + to_py_time, + to_py_timedelta, ) _filter = eval( @@ -86,10 +86,10 @@ def _scan_pyarrow_dataset_impl( "Date": Date, "Datetime": Datetime, "Duration": Duration, - "_to_python_date": _to_python_date, - "_to_python_datetime": _to_python_datetime, - "_to_python_time": _to_python_time, - "_to_python_timedelta": _to_python_timedelta, + "to_py_date": to_py_date, + "to_py_datetime": to_py_datetime, + "to_py_time": to_py_time, + "to_py_timedelta": to_py_timedelta, }, ) diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 2be1a35a75b3..0d1d8ed8c3e4 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -68,7 +68,7 @@ parse_as_list_of_expressions, ) from polars.utils._wrap import wrap_df, wrap_expr -from polars.utils.convert import _negate_duration, _timedelta_to_pl_duration +from polars.utils.convert import negate_duration_string, parse_as_duration_string from polars.utils.deprecation import ( deprecate_function, deprecate_parameter_as_positional, @@ -3357,11 +3357,11 @@ def rolling( offset = deprecate_saturating(offset) index_column = parse_as_expression(index_column) if offset is None: - offset = _negate_duration(_timedelta_to_pl_duration(period)) + offset = negate_duration_string(parse_as_duration_string(period)) pyexprs_by = parse_as_list_of_expressions(by) if by is not None else [] - period = _timedelta_to_pl_duration(period) - offset = _timedelta_to_pl_duration(offset) + period = parse_as_duration_string(period) + offset = parse_as_duration_string(offset) lgb = self._ldf.rolling( index_column, period, offset, closed, pyexprs_by, check_sorted @@ -3703,14 +3703,14 @@ def group_by_dynamic( index_column = parse_as_expression(index_column) if offset is None: - offset = _negate_duration(_timedelta_to_pl_duration(every)) + offset = negate_duration_string(parse_as_duration_string(every)) if period is None: period = every - period = _timedelta_to_pl_duration(period) - offset = _timedelta_to_pl_duration(offset) - every = _timedelta_to_pl_duration(every) + period = parse_as_duration_string(period) + offset = parse_as_duration_string(offset) + every = parse_as_duration_string(every) pyexprs_by = parse_as_list_of_expressions(by) if by is not None else [] lgb = self._ldf.group_by_dynamic( @@ -3887,7 +3887,7 @@ def join_asof( if isinstance(tolerance, str): tolerance_str = tolerance elif isinstance(tolerance, timedelta): - tolerance_str = _timedelta_to_pl_duration(tolerance) + tolerance_str = parse_as_duration_string(tolerance) else: tolerance_num = tolerance diff --git a/py-polars/polars/series/datetime.py b/py-polars/polars/series/datetime.py index 8980f53426d3..f4b60d80dc1c 100644 --- a/py-polars/polars/series/datetime.py +++ b/py-polars/polars/series/datetime.py @@ -5,7 +5,7 @@ from polars.datatypes import Date, Datetime, Duration from polars.series.utils import expr_dispatch from polars.utils._wrap import wrap_s -from polars.utils.convert import _to_python_date, _to_python_datetime +from polars.utils.convert import to_py_date, to_py_datetime from polars.utils.deprecation import deprecate_function, deprecate_renamed_function from polars.utils.unstable import unstable @@ -81,11 +81,11 @@ def median(self) -> TemporalLiteral | float | None: out = s.median() if out is not None: if s.dtype == Date: - return _to_python_date(int(out)) # type: ignore[arg-type] + return to_py_date(int(out)) # type: ignore[arg-type] elif s.dtype in (Datetime, Duration): return out # type: ignore[return-value] else: - return _to_python_datetime(int(out), s.dtype.time_unit) # type: ignore[arg-type, attr-defined] + return to_py_datetime(int(out), s.dtype.time_unit) # type: ignore[arg-type, attr-defined] return None def mean(self) -> TemporalLiteral | float | None: @@ -105,11 +105,11 @@ def mean(self) -> TemporalLiteral | float | None: out = s.mean() if out is not None: if s.dtype == Date: - return _to_python_date(int(out)) # type: ignore[arg-type] + return to_py_date(int(out)) # type: ignore[arg-type] elif s.dtype in (Datetime, Duration): return out # type: ignore[return-value] else: - return _to_python_datetime(int(out), s.dtype.time_unit) # type: ignore[arg-type, attr-defined] + return to_py_datetime(int(out), s.dtype.time_unit) # type: ignore[arg-type, attr-defined] return None def to_string(self, format: str) -> Series: diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index d4f2e3feb27a..e2500eb91c87 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -86,10 +86,10 @@ ) from polars.utils._wrap import wrap_df from polars.utils.convert import ( - _date_to_pl_date, - _datetime_to_pl_timestamp, - _time_to_pl_time, - _timedelta_to_pl_timedelta, + date_to_int, + datetime_to_int, + time_to_int, + timedelta_to_int, ) from polars.utils.deprecation import ( deprecate_function, @@ -696,20 +696,20 @@ def _comp(self, other: Any, op: ComparisonOperator) -> Series: else: msg = f"cannot compare datetime.datetime to Series of type {self.dtype}" raise ValueError(msg) - ts = _datetime_to_pl_timestamp(other, time_unit) # type: ignore[arg-type] + ts = datetime_to_int(other, time_unit) # type: ignore[arg-type] f = get_ffi_func(op + "_<>", Int64, self._s) assert f is not None return self._from_pyseries(f(ts)) elif isinstance(other, time) and self.dtype == Time: - d = _time_to_pl_time(other) + d = time_to_int(other) f = get_ffi_func(op + "_<>", Int64, self._s) assert f is not None return self._from_pyseries(f(d)) elif isinstance(other, timedelta) and self.dtype == Duration: time_unit = self.dtype.time_unit # type: ignore[attr-defined] - td = _timedelta_to_pl_timedelta(other, time_unit) # type: ignore[arg-type] + td = timedelta_to_int(other, time_unit) # type: ignore[arg-type] f = get_ffi_func(op + "_<>", Int64, self._s) assert f is not None return self._from_pyseries(f(td)) @@ -718,7 +718,7 @@ def _comp(self, other: Any, op: ComparisonOperator) -> Series: other = Series([other]) elif isinstance(other, date) and self.dtype == Date: - d = _date_to_pl_date(other) + d = date_to_int(other) f = get_ffi_func(op + "_<>", Int32, self._s) assert f is not None return self._from_pyseries(f(d)) diff --git a/py-polars/polars/utils/__init__.py b/py-polars/polars/utils/__init__.py index 133bca13981b..324fbd9e1283 100644 --- a/py-polars/polars/utils/__init__.py +++ b/py-polars/polars/utils/__init__.py @@ -5,16 +5,16 @@ """ from polars.utils._scan import _execute_from_rust from polars.utils.convert import ( - _date_to_pl_date, _datetime_for_any_value, _datetime_for_any_value_windows, - _time_to_pl_time, - _timedelta_to_pl_timedelta, - _to_python_date, - _to_python_datetime, - _to_python_decimal, - _to_python_time, - _to_python_timedelta, + date_to_int, + time_to_int, + timedelta_to_int, + to_py_date, + to_py_datetime, + to_py_decimal, + to_py_time, + to_py_timedelta, ) from polars.utils.various import NoDefault, _polars_warn, is_column, no_default @@ -23,16 +23,16 @@ "is_column", "no_default", # Required for Rust bindings - "_date_to_pl_date", + "date_to_int", + "time_to_int", + "timedelta_to_int", "_datetime_for_any_value", "_datetime_for_any_value_windows", "_execute_from_rust", "_polars_warn", - "_time_to_pl_time", - "_timedelta_to_pl_timedelta", - "_to_python_date", - "_to_python_datetime", - "_to_python_decimal", - "_to_python_time", - "_to_python_timedelta", + "to_py_date", + "to_py_datetime", + "to_py_decimal", + "to_py_time", + "to_py_timedelta", ] diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/utils/convert.py index 7008f82235ff..8359ea83e1a1 100644 --- a/py-polars/polars/utils/convert.py +++ b/py-polars/polars/utils/convert.py @@ -1,49 +1,26 @@ from __future__ import annotations -import sys from datetime import date, datetime, time, timedelta, timezone from decimal import Context from functools import lru_cache -from typing import TYPE_CHECKING, Any, Callable, Sequence, TypeVar, overload +from typing import ( + TYPE_CHECKING, + Any, + Callable, + NoReturn, + Sequence, + no_type_check, + overload, +) from polars.dependencies import _ZONEINFO_AVAILABLE, zoneinfo if TYPE_CHECKING: - from collections.abc import Reversible from datetime import tzinfo from decimal import Decimal from polars.type_aliases import TimeUnit - if sys.version_info >= (3, 10): - from typing import ParamSpec - else: - from typing_extensions import ParamSpec - - P = ParamSpec("P") - T = TypeVar("T") - - # the below shenanigans with ZoneInfo are all to handle a - # typing issue in py < 3.9 while preserving lazy-loading - if sys.version_info >= (3, 9): - from zoneinfo import ZoneInfo - elif _ZONEINFO_AVAILABLE: - from backports.zoneinfo._zoneinfo import ZoneInfo - - def get_zoneinfo(key: str) -> ZoneInfo: # noqa: D103 - pass - -else: - - @lru_cache(None) - def get_zoneinfo(key: str) -> ZoneInfo: # noqa: D103 - return zoneinfo.ZoneInfo(key) - - -# note: reversed views don't match as instances of MappingView -if sys.version_info >= (3, 11): - _views: list[Reversible[Any]] = [{}.keys(), {}.values(), {}.items()] - _reverse_mapping_views = tuple(type(reversed(view)) for view in _views) SECONDS_PER_DAY = 86_400 SECONDS_PER_HOUR = 3_600 @@ -56,185 +33,171 @@ def get_zoneinfo(key: str) -> ZoneInfo: # noqa: D103 EPOCH_UTC = datetime(1970, 1, 1, tzinfo=timezone.utc) -def _timestamp_in_seconds(dt: datetime) -> int: - du = dt - EPOCH_UTC - return du.days * SECONDS_PER_DAY + du.seconds - - @overload -def _timedelta_to_pl_duration(td: None) -> None: +def parse_as_duration_string(td: None) -> None: ... @overload -def _timedelta_to_pl_duration(td: timedelta | str) -> str: +def parse_as_duration_string(td: timedelta | str) -> str: ... -def _timedelta_to_pl_duration(td: timedelta | str | None) -> str | None: - """Convert python timedelta to a polars duration string.""" +def parse_as_duration_string(td: timedelta | str | None) -> str | None: + """Parse duration input as a Polars duration string.""" if td is None or isinstance(td, str): return td + return _timedelta_to_duration_string(td) + +def _timedelta_to_duration_string(td: timedelta) -> str: + """Convert a Python timedelta object to a Polars duration string.""" + # Positive duration if td.days >= 0: - d = td.days and f"{td.days}d" or "" - s = td.seconds and f"{td.seconds}s" or "" - us = td.microseconds and f"{td.microseconds}us" or "" + d = f"{td.days}d" if td.days != 0 else "" + s = f"{td.seconds}s" if td.seconds != 0 else "" + us = f"{td.microseconds}us" if td.microseconds != 0 else "" + # Negative, whole days + elif td.seconds == 0 and td.microseconds == 0: + return f"{td.days}d" + # Negative, other else: - if not td.seconds and not td.microseconds: - d = td.days and f"{td.days}d" or "" - s = "" - us = "" - else: - corrected_d = td.days + 1 - d = corrected_d and f"{corrected_d}d" or "-" - corrected_seconds = SECONDS_PER_DAY - (td.seconds + (td.microseconds > 0)) - s = corrected_seconds and f"{corrected_seconds}s" or "" - us = td.microseconds and f"{10**6 - td.microseconds}us" or "" + corrected_d = td.days + 1 + corrected_seconds = SECONDS_PER_DAY - (td.seconds + (td.microseconds > 0)) + d = f"{corrected_d}d" if corrected_d != 0 else "-" + s = f"{corrected_seconds}s" if corrected_seconds != 0 else "" + us = f"{10**6 - td.microseconds}us" if td.microseconds != 0 else "" return f"{d}{s}{us}" -def _negate_duration(duration: str) -> str: +def negate_duration_string(duration: str) -> str: + """Negate a Polars duration string.""" if duration.startswith("-"): return duration[1:] - return f"-{duration}" + else: + return f"-{duration}" + + +def date_to_int(d: date) -> int: + """Convert a Python time object to an integer.""" + return (d - EPOCH_DATE).days -def _time_to_pl_time(t: time) -> int: +def time_to_int(t: time) -> int: + """Convert a Python time object to an integer.""" t = t.replace(tzinfo=timezone.utc) seconds = t.hour * SECONDS_PER_HOUR + t.minute * 60 + t.second microseconds = t.microsecond return seconds * NS_PER_SECOND + microseconds * 1_000 -def _date_to_pl_date(d: date) -> int: - return (d - EPOCH_DATE).days - - -def _datetime_to_pl_timestamp(dt: datetime, time_unit: TimeUnit | None) -> int: - """Convert a python datetime to a timestamp in given time unit.""" +def datetime_to_int(dt: datetime, time_unit: TimeUnit) -> int: + """Convert a Python datetime object to an integer.""" + # Make sure to use UTC rather than system time zone if dt.tzinfo is None: - # Make sure to use UTC rather than system time zone dt = dt.replace(tzinfo=timezone.utc) - microseconds = dt.microsecond + seconds = _timestamp_in_seconds(dt) - if time_unit == "ns": - return seconds * NS_PER_SECOND + microseconds * 1_000 - elif time_unit == "us" or time_unit is None: + microseconds = dt.microsecond + + if time_unit == "us": return seconds * US_PER_SECOND + microseconds + elif time_unit == "ns": + return seconds * NS_PER_SECOND + microseconds * 1_000 elif time_unit == "ms": return seconds * MS_PER_SECOND + microseconds // 1_000 - msg = f"`time_unit` must be one of {{'ms', 'us', 'ns'}}, got {time_unit!r}" - raise ValueError(msg) + else: + _raise_invalid_time_unit(time_unit) -def _timedelta_to_pl_timedelta(td: timedelta, time_unit: TimeUnit | None) -> int: - """Convert a Python timedelta object to a total number of subseconds.""" - microseconds = td.microseconds +def _timestamp_in_seconds(dt: datetime) -> int: + td = dt - EPOCH_UTC + return td.days * SECONDS_PER_DAY + td.seconds + + +def timedelta_to_int(td: timedelta, time_unit: TimeUnit) -> int: + """Convert a Python timedelta object to an integer.""" seconds = td.days * SECONDS_PER_DAY + td.seconds - if time_unit == "ns": - return seconds * NS_PER_SECOND + microseconds * 1_000 - elif time_unit == "us" or time_unit is None: + microseconds = td.microseconds + + if time_unit == "us": return seconds * US_PER_SECOND + microseconds + elif time_unit == "ns": + return seconds * NS_PER_SECOND + microseconds * 1_000 elif time_unit == "ms": return seconds * MS_PER_SECOND + microseconds // 1_000 - - -def _to_python_time(value: int) -> time: - """Convert polars int64 (ns) timestamp to python time object.""" - if value == 0: - return time(microsecond=0) else: - seconds, nanoseconds = divmod(value, NS_PER_SECOND) - minutes, seconds = divmod(seconds, 60) - hours, minutes = divmod(minutes, 60) - return time( - hour=hours, minute=minutes, second=seconds, microsecond=nanoseconds // 1_000 - ) + _raise_invalid_time_unit(time_unit) -def _to_python_timedelta( - value: int | float, time_unit: TimeUnit | None = "ns" -) -> timedelta: - if time_unit == "ns": - return timedelta(microseconds=value // 1_000) - elif time_unit == "us": - return timedelta(microseconds=value) - elif time_unit == "ms": - return timedelta(milliseconds=value) - else: - msg = f"`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {time_unit!r}" - raise ValueError(msg) +@lru_cache(256) +def to_py_date(value: int | float) -> date: + """Convert an integer or float to a Python date object.""" + return EPOCH_DATE + timedelta(days=value) -@lru_cache(256) -def _to_python_date(value: int | float) -> date: - """Convert polars int64 timestamp to Python date.""" - return (EPOCH_UTC + timedelta(seconds=value * SECONDS_PER_DAY)).date() +def to_py_time(value: int) -> time: + """Convert an integer to a Python time object.""" + # Fast path for 00:00 + if value == 0: + return time() + seconds, nanoseconds = divmod(value, NS_PER_SECOND) + minutes, seconds = divmod(seconds, 60) + hours, minutes = divmod(minutes, 60) + return time( + hour=hours, minute=minutes, second=seconds, microsecond=nanoseconds // 1_000 + ) -def _to_python_datetime( + +def to_py_datetime( value: int | float, - time_unit: TimeUnit | None = "ns", + time_unit: TimeUnit, time_zone: str | None = None, ) -> datetime: - """Convert polars int64 timestamp to Python datetime.""" - if not time_zone: - if time_unit == "us": - return EPOCH + timedelta(microseconds=value) - elif time_unit == "ns": - return EPOCH + timedelta(microseconds=value // 1_000) - elif time_unit == "ms": - return EPOCH + timedelta(milliseconds=value) - else: - msg = f"`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {time_unit!r}" - raise ValueError(msg) + """Convert an integer or float to a Python datetime object.""" + if time_unit == "us": + td = timedelta(microseconds=value) + elif time_unit == "ns": + td = timedelta(microseconds=value // 1_000) + elif time_unit == "ms": + td = timedelta(milliseconds=value) + else: + _raise_invalid_time_unit(time_unit) + + if time_zone is None: + return EPOCH + td elif _ZONEINFO_AVAILABLE: - if time_unit == "us": - dt = EPOCH_UTC + timedelta(microseconds=value) - elif time_unit == "ns": - dt = EPOCH_UTC + timedelta(microseconds=value // 1_000) - elif time_unit == "ms": - dt = EPOCH_UTC + timedelta(milliseconds=value) - else: - msg = f"`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {time_unit!r}" - raise ValueError(msg) - return _localize(dt, time_zone) + dt = EPOCH_UTC + td + return _localize_datetime(dt, time_zone) else: msg = "install polars[timezone] to handle datetimes with time zone information" raise ImportError(msg) -def _localize(dt: datetime, time_zone: str) -> datetime: +def _localize_datetime(dt: datetime, time_zone: str) -> datetime: # zone info installation should already be checked - _tzinfo: ZoneInfo | tzinfo try: - _tzinfo = get_zoneinfo(time_zone) + tz = string_to_zoneinfo(time_zone) except zoneinfo.ZoneInfoNotFoundError: # try fixed offset, which is not supported by ZoneInfo - _tzinfo = _parse_fixed_tz_offset(time_zone) + tz = _parse_fixed_tz_offset(time_zone) - return dt.astimezone(_tzinfo) + return dt.astimezone(tz) -def _datetime_for_any_value(dt: datetime) -> tuple[int, int]: - """Used in PyO3 AnyValue conversion.""" - # returns (s, ms) - if dt.tzinfo is None: - return ( - _timestamp_in_seconds(dt.replace(tzinfo=timezone.utc)), - dt.microsecond, - ) - return (_timestamp_in_seconds(dt), dt.microsecond) - +@no_type_check +@lru_cache(None) +def string_to_zoneinfo(key: str) -> Any: + """ + Convert a time zone string to a Python ZoneInfo object. -def _datetime_for_any_value_windows(dt: datetime) -> tuple[float, int]: - """Used in PyO3 AnyValue conversion.""" - if dt.tzinfo is None: - dt = _localize(dt, "UTC") - # returns (s, ms) - return (_timestamp_in_seconds(dt), dt.microsecond) + This is a simple wrapper for the zoneinfo.ZoneInfo constructor. + The wrapper is useful because zoneinfo is not available on Python 3.8 + and the backports module may not be installed. + """ + return zoneinfo.ZoneInfo(key) # cache here as we have a single tz per column @@ -249,15 +212,26 @@ def _parse_fixed_tz_offset(offset: str) -> tzinfo: # minutes, then we can construct: # tzinfo=timezone(timedelta(hours=..., minutes=...)) except ValueError: - msg = f"offset: {offset!r} not understood" + msg = f"unexpected time zone offset: {offset!r}" raise ValueError(msg) from None return dt_offset.tzinfo # type: ignore[return-value] -def _to_python_decimal( - sign: int, digits: Sequence[int], prec: int, scale: int -) -> Decimal: +def to_py_timedelta(value: int | float, time_unit: TimeUnit) -> timedelta: + """Convert an integer or float to a Python timedelta object.""" + if time_unit == "us": + return timedelta(microseconds=value) + elif time_unit == "ns": + return timedelta(microseconds=value // 1_000) + elif time_unit == "ms": + return timedelta(milliseconds=value) + else: + _raise_invalid_time_unit(time_unit) + + +def to_py_decimal(sign: int, digits: Sequence[int], prec: int, scale: int) -> Decimal: + """Convert decimal components to a Python Decimal object.""" return _create_decimal_with_prec(prec)((sign, digits, scale)) @@ -267,3 +241,27 @@ def _create_decimal_with_prec( ) -> Callable[[tuple[int, Sequence[int], int]], Decimal]: # pre-cache contexts so we don't have to spend time on recreating them every time return Context(prec=precision).create_decimal + + +def _datetime_for_any_value(dt: datetime) -> tuple[int, int]: + """Used in PyO3 AnyValue conversion.""" + # returns (s, ms) + if dt.tzinfo is None: + return ( + _timestamp_in_seconds(dt.replace(tzinfo=timezone.utc)), + dt.microsecond, + ) + return (_timestamp_in_seconds(dt), dt.microsecond) + + +def _datetime_for_any_value_windows(dt: datetime) -> tuple[float, int]: + """Used in PyO3 AnyValue conversion.""" + if dt.tzinfo is None: + dt = _localize_datetime(dt, "UTC") + # returns (s, ms) + return (_timestamp_in_seconds(dt), dt.microsecond) + + +def _raise_invalid_time_unit(time_unit: Any) -> NoReturn: + msg = f"`time_unit` must be one of {{'ms', 'us', 'ns'}}, got {time_unit!r}" + raise ValueError(msg) diff --git a/py-polars/src/conversion/any_value.rs b/py-polars/src/conversion/any_value.rs index a66ec63d5354..1e327f2cf1f1 100644 --- a/py-polars/src/conversion/any_value.rs +++ b/py-polars/src/conversion/any_value.rs @@ -39,11 +39,11 @@ impl IntoPy for Wrap> { s.into_py(py) }, AnyValue::Date(v) => { - let convert = utils.getattr(intern!(py, "_to_python_date")).unwrap(); + let convert = utils.getattr(intern!(py, "to_py_date")).unwrap(); convert.call1((v,)).unwrap().into_py(py) }, AnyValue::Datetime(v, time_unit, time_zone) => { - let convert = utils.getattr(intern!(py, "_to_python_datetime")).unwrap(); + let convert = utils.getattr(intern!(py, "to_py_datetime")).unwrap(); let time_unit = time_unit.to_ascii(); convert .call1((v, time_unit, time_zone.as_ref().map(|s| s.as_str()))) @@ -51,12 +51,12 @@ impl IntoPy for Wrap> { .into_py(py) }, AnyValue::Duration(v, time_unit) => { - let convert = utils.getattr(intern!(py, "_to_python_timedelta")).unwrap(); + let convert = utils.getattr(intern!(py, "to_py_timedelta")).unwrap(); let time_unit = time_unit.to_ascii(); convert.call1((v, time_unit)).unwrap().into_py(py) }, AnyValue::Time(v) => { - let convert = utils.getattr(intern!(py, "_to_python_time")).unwrap(); + let convert = utils.getattr(intern!(py, "to_py_time")).unwrap(); convert.call1((v,)).unwrap().into_py(py) }, AnyValue::Array(v, _) | AnyValue::List(v) => PySeries::new(v).to_list(), @@ -75,7 +75,7 @@ impl IntoPy for Wrap> { AnyValue::Binary(v) => v.into_py(py), AnyValue::BinaryOwned(v) => v.into_py(py), AnyValue::Decimal(v, scale) => { - let convert = utils.getattr(intern!(py, "_to_python_decimal")).unwrap(); + let convert = utils.getattr(intern!(py, "to_py_decimal")).unwrap(); const N: usize = 3; let mut buf = [0_u128; N]; let n_digits = decimal_to_digits(v.abs(), &mut buf); @@ -209,7 +209,7 @@ impl<'s> FromPyObject<'s> for Wrap> { Python::with_gil(|py| { let td = UTILS .as_ref(py) - .getattr(intern!(py, "_timedelta_to_pl_timedelta")) + .getattr(intern!(py, "timedelta_to_int")) .unwrap() .call1((ob, intern!(py, "us"))) .unwrap(); @@ -222,7 +222,7 @@ impl<'s> FromPyObject<'s> for Wrap> { Python::with_gil(|py| { let time = UTILS .as_ref(py) - .getattr(intern!(py, "_time_to_pl_time")) + .getattr(intern!(py, "time_to_int")) .unwrap() .call1((ob,)) .unwrap(); @@ -348,7 +348,7 @@ fn convert_date(ob: &PyAny) -> PyResult> { Python::with_gil(|py| { let date = UTILS .as_ref(py) - .getattr(intern!(py, "_date_to_pl_date")) + .getattr(intern!(py, "date_to_int")) .unwrap() .call1((ob,)) .unwrap(); diff --git a/py-polars/src/conversion/chunked_array.rs b/py-polars/src/conversion/chunked_array.rs index 914b8a8017ea..c2e65a0613db 100644 --- a/py-polars/src/conversion/chunked_array.rs +++ b/py-polars/src/conversion/chunked_array.rs @@ -112,12 +112,12 @@ impl ToPyObject for Wrap<&StructChunked> { impl ToPyObject for Wrap<&DurationChunked> { fn to_object(&self, py: Python) -> PyObject { let utils = UTILS.as_ref(py); - let convert = utils.getattr(intern!(py, "_to_python_timedelta")).unwrap(); - let time_unit = Wrap(self.0.time_unit()).to_object(py); + let convert = utils.getattr(intern!(py, "to_py_timedelta")).unwrap(); + let time_unit = self.0.time_unit().to_ascii(); let iter = self .0 .into_iter() - .map(|opt_v| opt_v.map(|v| convert.call1((v, &time_unit)).unwrap())); + .map(|opt_v| opt_v.map(|v| convert.call1((v, time_unit)).unwrap())); PyList::new(py, iter).into_py(py) } } @@ -125,13 +125,13 @@ impl ToPyObject for Wrap<&DurationChunked> { impl ToPyObject for Wrap<&DatetimeChunked> { fn to_object(&self, py: Python) -> PyObject { let utils = UTILS.as_ref(py); - let convert = utils.getattr(intern!(py, "_to_python_datetime")).unwrap(); - let time_unit = Wrap(self.0.time_unit()).to_object(py); + let convert = utils.getattr(intern!(py, "to_py_datetime")).unwrap(); + let time_unit = self.0.time_unit().to_ascii(); let time_zone = self.0.time_zone().to_object(py); let iter = self .0 .into_iter() - .map(|opt_v| opt_v.map(|v| convert.call1((v, &time_unit, &time_zone)).unwrap())); + .map(|opt_v| opt_v.map(|v| convert.call1((v, time_unit, &time_zone)).unwrap())); PyList::new(py, iter).into_py(py) } } @@ -148,7 +148,7 @@ pub(crate) fn time_to_pyobject_iter<'a>( ca: &'a TimeChunked, ) -> impl ExactSizeIterator> { let utils = UTILS.as_ref(py); - let convert = utils.getattr(intern!(py, "_to_python_time")).unwrap(); + let convert = utils.getattr(intern!(py, "to_py_time")).unwrap(); ca.0.into_iter() .map(|opt_v| opt_v.map(|v| convert.call1((v,)).unwrap())) } @@ -156,7 +156,7 @@ pub(crate) fn time_to_pyobject_iter<'a>( impl ToPyObject for Wrap<&DateChunked> { fn to_object(&self, py: Python) -> PyObject { let utils = UTILS.as_ref(py); - let convert = utils.getattr(intern!(py, "_to_python_date")).unwrap(); + let convert = utils.getattr(intern!(py, "to_py_date")).unwrap(); let iter = self .0 .into_iter() @@ -177,7 +177,7 @@ pub(crate) fn decimal_to_pyobject_iter<'a>( ca: &'a DecimalChunked, ) -> impl ExactSizeIterator> { let utils = UTILS.as_ref(py); - let convert = utils.getattr(intern!(py, "_to_python_decimal")).unwrap(); + let convert = utils.getattr(intern!(py, "to_py_decimal")).unwrap(); let py_scale = (-(ca.scale() as i32)).to_object(py); // if we don't know precision, the only safe bet is to set it to 39 let py_precision = ca.precision().unwrap_or(39).to_object(py); diff --git a/py-polars/tests/parametric/test_groupby_rolling.py b/py-polars/tests/parametric/test_groupby_rolling.py index e8af621d1848..ed7f0b5d513c 100644 --- a/py-polars/tests/parametric/test_groupby_rolling.py +++ b/py-polars/tests/parametric/test_groupby_rolling.py @@ -11,7 +11,7 @@ from polars.testing import assert_frame_equal from polars.testing.parametric.primitives import column, dataframes from polars.testing.parametric.strategies import strategy_closed, strategy_time_unit -from polars.utils.convert import _timedelta_to_pl_duration +from polars.utils.convert import parse_as_duration_string if TYPE_CHECKING: from polars.type_aliases import ClosedInterval, TimeUnit @@ -20,10 +20,10 @@ @given( period=st.timedeltas( min_value=timedelta(microseconds=0), max_value=timedelta(days=1000) - ).map(_timedelta_to_pl_duration), + ).map(parse_as_duration_string), offset=st.timedeltas( min_value=timedelta(microseconds=0), max_value=timedelta(days=1000) - ).map(_timedelta_to_pl_duration), + ).map(parse_as_duration_string), closed=strategy_closed, data=st.data(), time_unit=strategy_time_unit, @@ -85,7 +85,7 @@ def test_rolling( @given( window_size=st.timedeltas( min_value=timedelta(microseconds=0), max_value=timedelta(days=2) - ).map(_timedelta_to_pl_duration), + ).map(parse_as_duration_string), closed=strategy_closed, data=st.data(), time_unit=strategy_time_unit, diff --git a/py-polars/tests/unit/constructors/test_constructors.py b/py-polars/tests/unit/constructors/test_constructors.py index 5e836cb0775b..ca57a8b6fa13 100644 --- a/py-polars/tests/unit/constructors/test_constructors.py +++ b/py-polars/tests/unit/constructors/test_constructors.py @@ -1,6 +1,5 @@ from __future__ import annotations -import sys from collections import OrderedDict, namedtuple from datetime import date, datetime, time, timedelta, timezone from decimal import Decimal @@ -15,7 +14,7 @@ import polars as pl from polars.datatypes import PolarsDataType, numpy_char_code_to_dtype -from polars.dependencies import _ZONEINFO_AVAILABLE, dataclasses, pydantic +from polars.dependencies import dataclasses, pydantic from polars.exceptions import TimeZoneAwareConstructorWarning from polars.testing import assert_frame_equal, assert_series_equal from polars.utils._construction import type_hints @@ -23,14 +22,11 @@ if TYPE_CHECKING: from collections.abc import Callable - from polars.datatypes import PolarsDataType - -if sys.version_info >= (3, 9): from zoneinfo import ZoneInfo -elif _ZONEINFO_AVAILABLE: - # Import from submodule due to typing issue with backports.zoneinfo package: - # https://github.com/pganssle/zoneinfo/issues/125 - from backports.zoneinfo._zoneinfo import ZoneInfo + + from polars.datatypes import PolarsDataType +else: + from polars.utils.convert import string_to_zoneinfo as ZoneInfo # ----------------------------------------------------------------------------------- diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index 339c2a6ab038..0de867b519b8 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -27,14 +27,11 @@ from polars.utils._construction import iterable_to_pydf if TYPE_CHECKING: - from polars.type_aliases import JoinStrategy, UniqueKeepStrategy - -if sys.version_info >= (3, 9): from zoneinfo import ZoneInfo + + from polars.type_aliases import JoinStrategy, UniqueKeepStrategy else: - # Import from submodule due to typing issue with backports.zoneinfo package: - # https://github.com/pganssle/zoneinfo/issues/125 - from backports.zoneinfo._zoneinfo import ZoneInfo + from polars.utils.convert import string_to_zoneinfo as ZoneInfo def test_version() -> None: diff --git a/py-polars/tests/unit/datatypes/test_temporal.py b/py-polars/tests/unit/datatypes/test_temporal.py index 3cddc1d7aaee..e3af046ad233 100644 --- a/py-polars/tests/unit/datatypes/test_temporal.py +++ b/py-polars/tests/unit/datatypes/test_temporal.py @@ -28,7 +28,7 @@ from polars.type_aliases import Ambiguous, PolarsTemporalType, TimeUnit else: - from polars.utils.convert import get_zoneinfo as ZoneInfo + from polars.utils.convert import string_to_zoneinfo as ZoneInfo def test_fill_null() -> None: @@ -1406,7 +1406,7 @@ def test_replace_time_zone() -> None: @pytest.mark.parametrize( ("to_tz", "tzinfo"), [ - ("America/Barbados", ZoneInfo(key="America/Barbados")), + ("America/Barbados", ZoneInfo("America/Barbados")), (None, None), ], ) @@ -1430,7 +1430,7 @@ def test_strptime_with_tz() -> None: .str.strptime(pl.Datetime("us", "Africa/Monrovia")) .item() ) - assert result == datetime(2020, 1, 1, 3, tzinfo=ZoneInfo(key="Africa/Monrovia")) + assert result == datetime(2020, 1, 1, 3, tzinfo=ZoneInfo("Africa/Monrovia")) @pytest.mark.parametrize( @@ -1496,7 +1496,7 @@ def test_convert_time_zone_lazy_schema() -> None: def test_convert_time_zone_on_tz_naive() -> None: ts = pl.Series(["2020-01-01"]).str.strptime(pl.Datetime) result = ts.dt.convert_time_zone("Asia/Kathmandu").item() - expected = datetime(2020, 1, 1, 5, 45, tzinfo=ZoneInfo(key="Asia/Kathmandu")) + expected = datetime(2020, 1, 1, 5, 45, tzinfo=ZoneInfo("Asia/Kathmandu")) assert result == expected result = ( ts.dt.replace_time_zone("UTC").dt.convert_time_zone("Asia/Kathmandu").item() @@ -1582,8 +1582,8 @@ def test_replace_time_zone_from_naive() -> None: pl.col("date").cast(pl.Datetime).dt.replace_time_zone("America/New_York") ).to_dict(as_series=False) == { "date": [ - datetime(2022, 1, 1, 0, 0, tzinfo=ZoneInfo(key="America/New_York")), - datetime(2022, 1, 2, 0, 0, tzinfo=ZoneInfo(key="America/New_York")), + datetime(2022, 1, 1, 0, 0, tzinfo=ZoneInfo("America/New_York")), + datetime(2022, 1, 2, 0, 0, tzinfo=ZoneInfo("America/New_York")), ] } @@ -1854,22 +1854,22 @@ def test_tz_aware_truncate() -> None: result = df.with_columns(pl.col("dt").dt.truncate("1d").alias("trunced")) expected = { "dt": [ - datetime(2022, 11, 1, 0, 0, tzinfo=ZoneInfo(key="America/New_York")), - datetime(2022, 11, 1, 12, 0, tzinfo=ZoneInfo(key="America/New_York")), - datetime(2022, 11, 2, 0, 0, tzinfo=ZoneInfo(key="America/New_York")), - datetime(2022, 11, 2, 12, 0, tzinfo=ZoneInfo(key="America/New_York")), - datetime(2022, 11, 3, 0, 0, tzinfo=ZoneInfo(key="America/New_York")), - datetime(2022, 11, 3, 12, 0, tzinfo=ZoneInfo(key="America/New_York")), - datetime(2022, 11, 4, 0, 0, tzinfo=ZoneInfo(key="America/New_York")), + datetime(2022, 11, 1, 0, 0, tzinfo=ZoneInfo("America/New_York")), + datetime(2022, 11, 1, 12, 0, tzinfo=ZoneInfo("America/New_York")), + datetime(2022, 11, 2, 0, 0, tzinfo=ZoneInfo("America/New_York")), + datetime(2022, 11, 2, 12, 0, tzinfo=ZoneInfo("America/New_York")), + datetime(2022, 11, 3, 0, 0, tzinfo=ZoneInfo("America/New_York")), + datetime(2022, 11, 3, 12, 0, tzinfo=ZoneInfo("America/New_York")), + datetime(2022, 11, 4, 0, 0, tzinfo=ZoneInfo("America/New_York")), ], "trunced": [ - datetime(2022, 11, 1, 0, 0, tzinfo=ZoneInfo(key="America/New_York")), - datetime(2022, 11, 1, 0, 0, tzinfo=ZoneInfo(key="America/New_York")), - datetime(2022, 11, 2, 0, 0, tzinfo=ZoneInfo(key="America/New_York")), - datetime(2022, 11, 2, 0, 0, tzinfo=ZoneInfo(key="America/New_York")), - datetime(2022, 11, 3, 0, 0, tzinfo=ZoneInfo(key="America/New_York")), - datetime(2022, 11, 3, 0, 0, tzinfo=ZoneInfo(key="America/New_York")), - datetime(2022, 11, 4, 0, 0, tzinfo=ZoneInfo(key="America/New_York")), + datetime(2022, 11, 1, 0, 0, tzinfo=ZoneInfo("America/New_York")), + datetime(2022, 11, 1, 0, 0, tzinfo=ZoneInfo("America/New_York")), + datetime(2022, 11, 2, 0, 0, tzinfo=ZoneInfo("America/New_York")), + datetime(2022, 11, 2, 0, 0, tzinfo=ZoneInfo("America/New_York")), + datetime(2022, 11, 3, 0, 0, tzinfo=ZoneInfo("America/New_York")), + datetime(2022, 11, 3, 0, 0, tzinfo=ZoneInfo("America/New_York")), + datetime(2022, 11, 4, 0, 0, tzinfo=ZoneInfo("America/New_York")), ], } assert result.to_dict(as_series=False) == expected @@ -1900,34 +1900,34 @@ def test_tz_aware_truncate() -> None: datetime(2022, 1, 1, 6, 0), ], "UTC": [ - datetime(2021, 12, 31, 23, 0, tzinfo=ZoneInfo(key="UTC")), - datetime(2022, 1, 1, 0, 0, tzinfo=ZoneInfo(key="UTC")), - datetime(2022, 1, 1, 1, 0, tzinfo=ZoneInfo(key="UTC")), - datetime(2022, 1, 1, 2, 0, tzinfo=ZoneInfo(key="UTC")), - datetime(2022, 1, 1, 3, 0, tzinfo=ZoneInfo(key="UTC")), - datetime(2022, 1, 1, 4, 0, tzinfo=ZoneInfo(key="UTC")), - datetime(2022, 1, 1, 5, 0, tzinfo=ZoneInfo(key="UTC")), - datetime(2022, 1, 1, 6, 0, tzinfo=ZoneInfo(key="UTC")), + datetime(2021, 12, 31, 23, 0, tzinfo=ZoneInfo("UTC")), + datetime(2022, 1, 1, 0, 0, tzinfo=ZoneInfo("UTC")), + datetime(2022, 1, 1, 1, 0, tzinfo=ZoneInfo("UTC")), + datetime(2022, 1, 1, 2, 0, tzinfo=ZoneInfo("UTC")), + datetime(2022, 1, 1, 3, 0, tzinfo=ZoneInfo("UTC")), + datetime(2022, 1, 1, 4, 0, tzinfo=ZoneInfo("UTC")), + datetime(2022, 1, 1, 5, 0, tzinfo=ZoneInfo("UTC")), + datetime(2022, 1, 1, 6, 0, tzinfo=ZoneInfo("UTC")), ], "CST": [ - datetime(2021, 12, 31, 17, 0, tzinfo=ZoneInfo(key="US/Central")), - datetime(2021, 12, 31, 18, 0, tzinfo=ZoneInfo(key="US/Central")), - datetime(2021, 12, 31, 19, 0, tzinfo=ZoneInfo(key="US/Central")), - datetime(2021, 12, 31, 20, 0, tzinfo=ZoneInfo(key="US/Central")), - datetime(2021, 12, 31, 21, 0, tzinfo=ZoneInfo(key="US/Central")), - datetime(2021, 12, 31, 22, 0, tzinfo=ZoneInfo(key="US/Central")), - datetime(2021, 12, 31, 23, 0, tzinfo=ZoneInfo(key="US/Central")), - datetime(2022, 1, 1, 0, 0, tzinfo=ZoneInfo(key="US/Central")), + datetime(2021, 12, 31, 17, 0, tzinfo=ZoneInfo("US/Central")), + datetime(2021, 12, 31, 18, 0, tzinfo=ZoneInfo("US/Central")), + datetime(2021, 12, 31, 19, 0, tzinfo=ZoneInfo("US/Central")), + datetime(2021, 12, 31, 20, 0, tzinfo=ZoneInfo("US/Central")), + datetime(2021, 12, 31, 21, 0, tzinfo=ZoneInfo("US/Central")), + datetime(2021, 12, 31, 22, 0, tzinfo=ZoneInfo("US/Central")), + datetime(2021, 12, 31, 23, 0, tzinfo=ZoneInfo("US/Central")), + datetime(2022, 1, 1, 0, 0, tzinfo=ZoneInfo("US/Central")), ], "CST truncated": [ - datetime(2021, 12, 31, 0, 0, tzinfo=ZoneInfo(key="US/Central")), - datetime(2021, 12, 31, 0, 0, tzinfo=ZoneInfo(key="US/Central")), - datetime(2021, 12, 31, 0, 0, tzinfo=ZoneInfo(key="US/Central")), - datetime(2021, 12, 31, 0, 0, tzinfo=ZoneInfo(key="US/Central")), - datetime(2021, 12, 31, 0, 0, tzinfo=ZoneInfo(key="US/Central")), - datetime(2021, 12, 31, 0, 0, tzinfo=ZoneInfo(key="US/Central")), - datetime(2021, 12, 31, 0, 0, tzinfo=ZoneInfo(key="US/Central")), - datetime(2022, 1, 1, 0, 0, tzinfo=ZoneInfo(key="US/Central")), + datetime(2021, 12, 31, 0, 0, tzinfo=ZoneInfo("US/Central")), + datetime(2021, 12, 31, 0, 0, tzinfo=ZoneInfo("US/Central")), + datetime(2021, 12, 31, 0, 0, tzinfo=ZoneInfo("US/Central")), + datetime(2021, 12, 31, 0, 0, tzinfo=ZoneInfo("US/Central")), + datetime(2021, 12, 31, 0, 0, tzinfo=ZoneInfo("US/Central")), + datetime(2021, 12, 31, 0, 0, tzinfo=ZoneInfo("US/Central")), + datetime(2021, 12, 31, 0, 0, tzinfo=ZoneInfo("US/Central")), + datetime(2022, 1, 1, 0, 0, tzinfo=ZoneInfo("US/Central")), ], } @@ -1956,10 +1956,10 @@ def test_tz_aware_to_string() -> None: result = df.with_columns(pl.col("dt").dt.to_string("%c").alias("fmt")) expected = { "dt": [ - datetime(2022, 11, 1, 0, 0, tzinfo=ZoneInfo(key="America/New_York")), - datetime(2022, 11, 2, 0, 0, tzinfo=ZoneInfo(key="America/New_York")), - datetime(2022, 11, 3, 0, 0, tzinfo=ZoneInfo(key="America/New_York")), - datetime(2022, 11, 4, 0, 0, tzinfo=ZoneInfo(key="America/New_York")), + datetime(2022, 11, 1, 0, 0, tzinfo=ZoneInfo("America/New_York")), + datetime(2022, 11, 2, 0, 0, tzinfo=ZoneInfo("America/New_York")), + datetime(2022, 11, 3, 0, 0, tzinfo=ZoneInfo("America/New_York")), + datetime(2022, 11, 4, 0, 0, tzinfo=ZoneInfo("America/New_York")), ], "fmt": [ "Tue Nov 1 00:00:00 2022", @@ -2017,12 +2017,12 @@ def test_tz_aware_filter_lit() -> None: datetime(1970, 1, 1, 5, 0), ], "nyc": [ - datetime(1970, 1, 1, 0, 0, tzinfo=ZoneInfo(key="America/New_York")), - datetime(1970, 1, 1, 1, 0, tzinfo=ZoneInfo(key="America/New_York")), - datetime(1970, 1, 1, 2, 0, tzinfo=ZoneInfo(key="America/New_York")), - datetime(1970, 1, 1, 3, 0, tzinfo=ZoneInfo(key="America/New_York")), - datetime(1970, 1, 1, 4, 0, tzinfo=ZoneInfo(key="America/New_York")), - datetime(1970, 1, 1, 5, 0, tzinfo=ZoneInfo(key="America/New_York")), + datetime(1970, 1, 1, 0, 0, tzinfo=ZoneInfo("America/New_York")), + datetime(1970, 1, 1, 1, 0, tzinfo=ZoneInfo("America/New_York")), + datetime(1970, 1, 1, 2, 0, tzinfo=ZoneInfo("America/New_York")), + datetime(1970, 1, 1, 3, 0, tzinfo=ZoneInfo("America/New_York")), + datetime(1970, 1, 1, 4, 0, tzinfo=ZoneInfo("America/New_York")), + datetime(1970, 1, 1, 5, 0, tzinfo=ZoneInfo("America/New_York")), ], } @@ -2097,26 +2097,26 @@ def test_truncate_expr() -> None: ambiguous_expr = df.select(pl.col("date").dt.truncate(every=pl.lit("30m"))) assert ambiguous_expr.to_dict(as_series=False) == { "date": [ - datetime(2020, 10, 25, tzinfo=ZoneInfo(key="Europe/London")), - datetime(2020, 10, 25, 0, 30, tzinfo=ZoneInfo(key="Europe/London")), - datetime(2020, 10, 25, 1, 0, tzinfo=ZoneInfo(key="Europe/London")), - datetime(2020, 10, 25, 1, 30, tzinfo=ZoneInfo(key="Europe/London")), - datetime(2020, 10, 25, 1, 0, tzinfo=ZoneInfo(key="Europe/London")), - datetime(2020, 10, 25, 1, 30, tzinfo=ZoneInfo(key="Europe/London")), - datetime(2020, 10, 25, 2, 0, tzinfo=ZoneInfo(key="Europe/London")), + datetime(2020, 10, 25, tzinfo=ZoneInfo("Europe/London")), + datetime(2020, 10, 25, 0, 30, tzinfo=ZoneInfo("Europe/London")), + datetime(2020, 10, 25, 1, 0, tzinfo=ZoneInfo("Europe/London")), + datetime(2020, 10, 25, 1, 30, tzinfo=ZoneInfo("Europe/London")), + datetime(2020, 10, 25, 1, 0, tzinfo=ZoneInfo("Europe/London")), + datetime(2020, 10, 25, 1, 30, tzinfo=ZoneInfo("Europe/London")), + datetime(2020, 10, 25, 2, 0, tzinfo=ZoneInfo("Europe/London")), ] } all_expr = df.select(pl.col("date").dt.truncate(every=pl.col("every"))) assert all_expr.to_dict(as_series=False) == { "date": [ - datetime(2020, 10, 25, tzinfo=ZoneInfo(key="Europe/London")), - datetime(2020, 10, 25, 0, 45, tzinfo=ZoneInfo(key="Europe/London")), - datetime(2020, 10, 25, 1, 0, tzinfo=ZoneInfo(key="Europe/London")), - datetime(2020, 10, 25, 1, 45, tzinfo=ZoneInfo(key="Europe/London")), - datetime(2020, 10, 25, 1, 0, tzinfo=ZoneInfo(key="Europe/London")), - datetime(2020, 10, 25, 1, 45, tzinfo=ZoneInfo(key="Europe/London")), - datetime(2020, 10, 25, 2, 0, tzinfo=ZoneInfo(key="Europe/London")), + datetime(2020, 10, 25, tzinfo=ZoneInfo("Europe/London")), + datetime(2020, 10, 25, 0, 45, tzinfo=ZoneInfo("Europe/London")), + datetime(2020, 10, 25, 1, 0, tzinfo=ZoneInfo("Europe/London")), + datetime(2020, 10, 25, 1, 45, tzinfo=ZoneInfo("Europe/London")), + datetime(2020, 10, 25, 1, 0, tzinfo=ZoneInfo("Europe/London")), + datetime(2020, 10, 25, 1, 45, tzinfo=ZoneInfo("Europe/London")), + datetime(2020, 10, 25, 2, 0, tzinfo=ZoneInfo("Europe/London")), ] } @@ -2352,13 +2352,13 @@ def test_round_ambiguous() -> None: df = df.select(pl.col("date").dt.round("30m", ambiguous=pl.col("ambiguous"))) assert df.to_dict(as_series=False) == { "date": [ - datetime(2020, 10, 25, 0, 30, tzinfo=ZoneInfo(key="Europe/London")), - datetime(2020, 10, 25, 1, tzinfo=ZoneInfo(key="Europe/London")), - datetime(2020, 10, 25, 1, 30, tzinfo=ZoneInfo(key="Europe/London")), - datetime(2020, 10, 25, 1, tzinfo=ZoneInfo(key="Europe/London")), - datetime(2020, 10, 25, 1, 30, tzinfo=ZoneInfo(key="Europe/London")), - datetime(2020, 10, 25, 2, tzinfo=ZoneInfo(key="Europe/London")), - datetime(2020, 10, 25, 2, 30, tzinfo=ZoneInfo(key="Europe/London")), + datetime(2020, 10, 25, 0, 30, tzinfo=ZoneInfo("Europe/London")), + datetime(2020, 10, 25, 1, tzinfo=ZoneInfo("Europe/London")), + datetime(2020, 10, 25, 1, 30, tzinfo=ZoneInfo("Europe/London")), + datetime(2020, 10, 25, 1, tzinfo=ZoneInfo("Europe/London")), + datetime(2020, 10, 25, 1, 30, tzinfo=ZoneInfo("Europe/London")), + datetime(2020, 10, 25, 2, tzinfo=ZoneInfo("Europe/London")), + datetime(2020, 10, 25, 2, 30, tzinfo=ZoneInfo("Europe/London")), ] } diff --git a/py-polars/tests/unit/expr/test_exprs.py b/py-polars/tests/unit/expr/test_exprs.py index 6e0be82f1557..fc5453035128 100644 --- a/py-polars/tests/unit/expr/test_exprs.py +++ b/py-polars/tests/unit/expr/test_exprs.py @@ -1,16 +1,8 @@ from __future__ import annotations -import sys from datetime import date, datetime, timedelta, timezone from itertools import permutations -from typing import Any, cast - -if sys.version_info >= (3, 9): - from zoneinfo import ZoneInfo -else: - # Import from submodule due to typing issue with backports.zoneinfo package: - # https://github.com/pganssle/zoneinfo/issues/125 - from backports.zoneinfo._zoneinfo import ZoneInfo +from typing import TYPE_CHECKING, Any, cast import numpy as np import pytest @@ -26,6 +18,11 @@ ) from polars.testing import assert_frame_equal, assert_series_equal +if TYPE_CHECKING: + from zoneinfo import ZoneInfo +else: + from polars.utils.convert import string_to_zoneinfo as ZoneInfo + def test_arg_true() -> None: df = pl.DataFrame({"a": [1, 1, 2, 1]}) diff --git a/py-polars/tests/unit/functions/as_datatype/test_as_datatype.py b/py-polars/tests/unit/functions/as_datatype/test_as_datatype.py index c1e266933f84..d0cafb850a61 100644 --- a/py-polars/tests/unit/functions/as_datatype/test_as_datatype.py +++ b/py-polars/tests/unit/functions/as_datatype/test_as_datatype.py @@ -13,7 +13,7 @@ from polars.type_aliases import TimeUnit else: - from polars.utils.convert import get_zoneinfo as ZoneInfo + from polars.utils.convert import string_to_zoneinfo as ZoneInfo def test_date_datetime() -> None: diff --git a/py-polars/tests/unit/functions/range/test_datetime_range.py b/py-polars/tests/unit/functions/range/test_datetime_range.py index 70d1cdda0c04..02150992d910 100644 --- a/py-polars/tests/unit/functions/range/test_datetime_range.py +++ b/py-polars/tests/unit/functions/range/test_datetime_range.py @@ -16,7 +16,7 @@ from polars.datatypes import PolarsDataType from polars.type_aliases import ClosedInterval, TimeUnit else: - from polars.utils.convert import get_zoneinfo as ZoneInfo + from polars.utils.convert import string_to_zoneinfo as ZoneInfo def test_datetime_range() -> None: @@ -166,13 +166,13 @@ def test_timezone_aware_datetime_range() -> None: assert pl.datetime_range( low, high, interval=timedelta(days=5), eager=True ).to_list() == [ - datetime(2022, 10, 17, 10, 0, tzinfo=ZoneInfo(key="Asia/Shanghai")), - datetime(2022, 10, 22, 10, 0, tzinfo=ZoneInfo(key="Asia/Shanghai")), - datetime(2022, 10, 27, 10, 0, tzinfo=ZoneInfo(key="Asia/Shanghai")), - datetime(2022, 11, 1, 10, 0, tzinfo=ZoneInfo(key="Asia/Shanghai")), - datetime(2022, 11, 6, 10, 0, tzinfo=ZoneInfo(key="Asia/Shanghai")), - datetime(2022, 11, 11, 10, 0, tzinfo=ZoneInfo(key="Asia/Shanghai")), - datetime(2022, 11, 16, 10, 0, tzinfo=ZoneInfo(key="Asia/Shanghai")), + datetime(2022, 10, 17, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")), + datetime(2022, 10, 22, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")), + datetime(2022, 10, 27, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")), + datetime(2022, 11, 1, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")), + datetime(2022, 11, 6, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")), + datetime(2022, 11, 11, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")), + datetime(2022, 11, 16, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")), ] with pytest.raises( diff --git a/py-polars/tests/unit/namespaces/test_datetime.py b/py-polars/tests/unit/namespaces/test_datetime.py index fd733b228b69..1f6fc9138581 100644 --- a/py-polars/tests/unit/namespaces/test_datetime.py +++ b/py-polars/tests/unit/namespaces/test_datetime.py @@ -1,6 +1,5 @@ from __future__ import annotations -import sys from datetime import date, datetime, time, timedelta from typing import TYPE_CHECKING @@ -8,19 +7,15 @@ import polars as pl from polars.datatypes import DTYPE_TEMPORAL_UNITS -from polars.dependencies import _ZONEINFO_AVAILABLE from polars.exceptions import ComputeError, InvalidOperationError from polars.testing import assert_frame_equal, assert_series_equal -if sys.version_info >= (3, 9): +if TYPE_CHECKING: from zoneinfo import ZoneInfo -elif _ZONEINFO_AVAILABLE: - # Import from submodule due to typing issue with backports.zoneinfo package: - # https://github.com/pganssle/zoneinfo/issues/125 - from backports.zoneinfo._zoneinfo import ZoneInfo -if TYPE_CHECKING: from polars.type_aliases import TemporalLiteral, TimeUnit +else: + from polars.utils.convert import string_to_zoneinfo as ZoneInfo @pytest.fixture() @@ -782,9 +777,9 @@ def test_offset_by_broadcasting() -> None: None, ], "d3": [ - datetime(2020, 10, 26, tzinfo=ZoneInfo(key="Europe/London")), - datetime(2020, 11, 4, tzinfo=ZoneInfo(key="Europe/London")), - datetime(2020, 10, 28, tzinfo=ZoneInfo(key="Europe/London")), + datetime(2020, 10, 26, tzinfo=ZoneInfo("Europe/London")), + datetime(2020, 11, 4, tzinfo=ZoneInfo("Europe/London")), + datetime(2020, 10, 28, tzinfo=ZoneInfo("Europe/London")), None, ], "d4": [ @@ -811,8 +806,8 @@ def test_offset_by_broadcasting() -> None: "d1": [datetime(2020, 11, 28), datetime(2021, 2, 5), None], "d2": [datetime(2021, 11, 25), datetime(2022, 2, 2), None], "d3": [ - datetime(2020, 10, 28, tzinfo=ZoneInfo(key="Europe/London")), - datetime(2021, 1, 5, tzinfo=ZoneInfo(key="Europe/London")), + datetime(2020, 10, 28, tzinfo=ZoneInfo("Europe/London")), + datetime(2021, 1, 5, tzinfo=ZoneInfo("Europe/London")), None, ], "d4": [datetime(2021, 11, 26).date(), datetime(2022, 2, 3).date(), None], diff --git a/py-polars/tests/unit/namespaces/test_strptime.py b/py-polars/tests/unit/namespaces/test_strptime.py index cba398d2d7d7..0d7a8fba8914 100644 --- a/py-polars/tests/unit/namespaces/test_strptime.py +++ b/py-polars/tests/unit/namespaces/test_strptime.py @@ -20,7 +20,7 @@ from polars.type_aliases import PolarsTemporalType, TimeUnit else: - from polars.utils.convert import get_zoneinfo as ZoneInfo + from polars.utils.convert import string_to_zoneinfo as ZoneInfo def test_str_strptime() -> None: @@ -505,8 +505,8 @@ def test_to_datetime_tz_aware_strptime(ts: str, fmt: str, expected: datetime) -> def test_crossing_dst(format: str) -> None: ts = ["2021-03-27T23:59:59+01:00", "2021-03-28T23:59:59+02:00"] result = pl.Series(ts).str.to_datetime(format) - assert result[0] == datetime(2021, 3, 27, 22, 59, 59, tzinfo=ZoneInfo(key="UTC")) - assert result[1] == datetime(2021, 3, 28, 21, 59, 59, tzinfo=ZoneInfo(key="UTC")) + assert result[0] == datetime(2021, 3, 27, 22, 59, 59, tzinfo=ZoneInfo("UTC")) + assert result[1] == datetime(2021, 3, 28, 21, 59, 59, tzinfo=ZoneInfo("UTC")) @pytest.mark.parametrize("format", ["%+", "%Y-%m-%dT%H:%M:%S%z"]) diff --git a/py-polars/tests/unit/operations/test_group_by_dynamic.py b/py-polars/tests/unit/operations/test_group_by_dynamic.py index 9404b22ea52a..bb3b5fa1f514 100644 --- a/py-polars/tests/unit/operations/test_group_by_dynamic.py +++ b/py-polars/tests/unit/operations/test_group_by_dynamic.py @@ -1,6 +1,5 @@ from __future__ import annotations -import sys from datetime import date, datetime, timedelta from typing import TYPE_CHECKING, Any @@ -10,15 +9,12 @@ import polars as pl from polars.testing import assert_frame_equal -if sys.version_info >= (3, 9): +if TYPE_CHECKING: from zoneinfo import ZoneInfo -else: - # Import from submodule due to typing issue with backports.zoneinfo package: - # https://github.com/pganssle/zoneinfo/issues/125 - from backports.zoneinfo._zoneinfo import ZoneInfo -if TYPE_CHECKING: from polars.type_aliases import Label, StartBy +else: + from polars.utils.convert import string_to_zoneinfo as ZoneInfo @pytest.mark.parametrize( diff --git a/py-polars/tests/unit/series/test_series.py b/py-polars/tests/unit/series/test_series.py index 32e60e8dfc5e..8b48beb5edce 100644 --- a/py-polars/tests/unit/series/test_series.py +++ b/py-polars/tests/unit/series/test_series.py @@ -36,7 +36,7 @@ from polars.type_aliases import EpochTimeUnit, PolarsDataType, TimeUnit else: - from polars.utils.convert import get_zoneinfo as ZoneInfo + from polars.utils.convert import string_to_zoneinfo as ZoneInfo def test_cum_agg() -> None: diff --git a/py-polars/tests/unit/utils/test_utils.py b/py-polars/tests/unit/utils/test_utils.py index 1b5e79a40586..4b716c1fa4f2 100644 --- a/py-polars/tests/unit/utils/test_utils.py +++ b/py-polars/tests/unit/utils/test_utils.py @@ -9,11 +9,11 @@ import polars as pl from polars.io._utils import _looks_like_url from polars.utils.convert import ( - _date_to_pl_date, - _datetime_to_pl_timestamp, - _time_to_pl_time, - _timedelta_to_pl_duration, - _timedelta_to_pl_timedelta, + date_to_int, + datetime_to_int, + parse_as_duration_string, + time_to_int, + timedelta_to_int, ) from polars.utils.various import ( _in_notebook, @@ -30,23 +30,41 @@ @pytest.mark.parametrize( - ("dt", "time_unit", "expected"), + ("td", "expected"), [ - (datetime(2121, 1, 1), "ns", 4_765_132_800_000_000_000), - (datetime(2121, 1, 1), "us", 4_765_132_800_000_000), - (datetime(2121, 1, 1), "ms", 4_765_132_800_000), - (datetime(2121, 1, 1), None, 4_765_132_800_000_000), - (datetime.min, "ns", -62_135_596_800_000_000_000), - (datetime.max, "ns", 253_402_300_799_999_999_000), - (datetime.min, "ms", -62_135_596_800_000), - (datetime.max, "ms", 253_402_300_799_999), + (timedelta(), ""), + (timedelta(days=1), "1d"), + (timedelta(days=-1), "-1d"), + (timedelta(seconds=1), "1s"), + (timedelta(seconds=-1), "-1s"), + (timedelta(microseconds=1), "1us"), + (timedelta(microseconds=-1), "-1us"), + (timedelta(days=1, seconds=1), "1d1s"), + (timedelta(minutes=-1, seconds=1), "-59s"), + (timedelta(days=-1, seconds=-1), "-1d1s"), + (timedelta(days=1, microseconds=1), "1d1us"), + (timedelta(days=-1, microseconds=-1), "-1d1us"), + (None, None), + ("1d2s", "1d2s"), ], ) -def test_datetime_to_pl_timestamp( - dt: datetime, time_unit: TimeUnit | None, expected: int +def test_parse_as_duration_string( + td: timedelta | str | None, expected: str | None ) -> None: - out = _datetime_to_pl_timestamp(dt, time_unit) - assert out == expected + assert parse_as_duration_string(td) == expected + + +@pytest.mark.parametrize( + ("d", "expected"), + [ + (date(1999, 9, 9), 10_843), + (date(1969, 12, 31), -1), + (date.min, -719_162), + (date.max, 2_932_896), + ], +) +def test_date_to_int(d: date, expected: int) -> None: + assert date_to_int(d) == expected @pytest.mark.parametrize( @@ -59,21 +77,24 @@ def test_datetime_to_pl_timestamp( (time.max, 86_399_999_999_000), ], ) -def test_time_to_pl_time(t: time, expected: int) -> None: - assert _time_to_pl_time(t) == expected +def test_time_to_int(t: time, expected: int) -> None: + assert time_to_int(t) == expected @pytest.mark.parametrize( - ("d", "expected"), + ("dt", "time_unit", "expected"), [ - (date(1999, 9, 9), 10_843), - (date(1969, 12, 31), -1), - (date.min, -719_162), - (date.max, 2_932_896), + (datetime(2121, 1, 1), "ns", 4_765_132_800_000_000_000), + (datetime(2121, 1, 1), "us", 4_765_132_800_000_000), + (datetime(2121, 1, 1), "ms", 4_765_132_800_000), + (datetime.min, "ns", -62_135_596_800_000_000_000), + (datetime.max, "ns", 253_402_300_799_999_999_000), + (datetime.min, "ms", -62_135_596_800_000), + (datetime.max, "ms", 253_402_300_799_999), ], ) -def test_date_to_pl_date(d: date, expected: int) -> None: - assert _date_to_pl_date(d) == expected +def test_datetime_to_int(dt: datetime, time_unit: TimeUnit, expected: int) -> None: + assert datetime_to_int(dt, time_unit) == expected @pytest.mark.parametrize( @@ -82,37 +103,14 @@ def test_date_to_pl_date(d: date, expected: int) -> None: (timedelta(days=1), "ns", 86_400_000_000_000), (timedelta(days=1), "us", 86_400_000_000), (timedelta(days=1), "ms", 86_400_000), - (timedelta(days=1), None, 86_400_000_000), (timedelta.min, "ns", -86_399_999_913_600_000_000_000), (timedelta.max, "ns", 86_399_999_999_999_999_999_000), (timedelta.min, "ms", -86_399_999_913_600_000), (timedelta.max, "ms", 86_399_999_999_999_999), ], ) -def test_timedelta_to_pl_timedelta( - td: timedelta, time_unit: TimeUnit | None, expected: int -) -> None: - assert _timedelta_to_pl_timedelta(td, time_unit) == expected - - -@pytest.mark.parametrize( - ("td", "expected"), - [ - (timedelta(days=1), "1d"), - (timedelta(days=-1), "-1d"), - (timedelta(seconds=1), "1s"), - (timedelta(seconds=-1), "-1s"), - (timedelta(microseconds=1), "1us"), - (timedelta(microseconds=-1), "-1us"), - (timedelta(days=1, seconds=1), "1d1s"), - (timedelta(days=-1, seconds=-1), "-1d1s"), - (timedelta(days=1, microseconds=1), "1d1us"), - (timedelta(days=-1, microseconds=-1), "-1d1us"), - ], -) -def test_timedelta_to_pl_duration(td: timedelta, expected: str) -> None: - out = _timedelta_to_pl_duration(td) - assert out == expected +def test_timedelta_to_int(td: timedelta, time_unit: TimeUnit, expected: int) -> None: + assert timedelta_to_int(td, time_unit) == expected def test_estimated_size() -> None: