Skip to content

Commit

Permalink
refactor(python): Refactor utils/convert.py module (#14739)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego authored and ritchie46 committed Feb 28, 2024
1 parent 676c506 commit 7231b44
Show file tree
Hide file tree
Showing 29 changed files with 430 additions and 452 deletions.
20 changes: 10 additions & 10 deletions crates/polars-plan/src/logical_plan/pyarrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ pub(super) struct Args {
}

fn to_py_datetime(v: i64, tu: &TimeUnit, tz: Option<&TimeZone>) -> String {
// note: `_to_python_datetime` and the `Datetime`
// note: `to_py_datetime` and the `Datetime`
// dtype have to be in-scope on the python side
match tz {
None => format!("_to_python_datetime({},'{}')", v, tu.to_ascii()),
Some(tz) => format!("_to_python_datetime({},'{}',{})", v, tu.to_ascii(), tz),
None => format!("to_py_datetime({},'{}')", v, tu.to_ascii()),
Some(tz) => format!("to_py_datetime({},'{}',{})", v, tu.to_ascii(), tz),
}
}

Expand Down Expand Up @@ -53,7 +53,7 @@ pub(super) fn predicate_to_pa(
let dtm = to_py_datetime(v, &tu, tz.as_ref());
write!(list_repr, "{dtm},").unwrap();
} else if let AnyValue::Date(v) = av {
write!(list_repr, "_to_python_date({v}),").unwrap();
write!(list_repr, "to_py_date({v}),").unwrap();
} else {
write!(list_repr, "{av},").unwrap();
}
Expand All @@ -79,25 +79,25 @@ pub(super) fn predicate_to_pa(
},
#[cfg(feature = "dtype-date")]
AnyValue::Date(v) => {
// the function `_to_python_date` and the `Date`
// the function `to_py_date` and the `Date`
// dtype have to be in scope on the python side
Some(format!("_to_python_date({v})"))
Some(format!("to_py_date({v})"))
},
#[cfg(feature = "dtype-datetime")]
AnyValue::Datetime(v, tu, tz) => Some(to_py_datetime(v, &tu, tz.as_ref())),
// Activate once pyarrow supports them
// #[cfg(feature = "dtype-time")]
// AnyValue::Time(v) => {
// // the function `_to_python_time` has to be in scope
// // the function `to_py_time` has to be in scope
// // on the python side
// Some(format!("_to_python_time(value={v})"))
// Some(format!("to_py_time(value={v})"))
// }
// #[cfg(feature = "dtype-duration")]
// AnyValue::Duration(v, tu) => {
// // the function `_to_python_timedelta` has to be in scope
// // the function `to_py_timedelta` has to be in scope
// // on the python side
// Some(format!(
// "_to_python_timedelta(value={}, tu='{}')",
// "to_py_timedelta(value={}, tu='{}')",
// v,
// tu.to_ascii()
// ))
Expand Down
6 changes: 3 additions & 3 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@
)
from polars.utils._parse_expr_input import parse_as_expression
from polars.utils._wrap import wrap_expr, wrap_ldf, wrap_s
from polars.utils.convert import _timedelta_to_pl_duration
from polars.utils.convert import parse_as_duration_string
from polars.utils.deprecation import (
deprecate_function,
deprecate_nonkeyword_arguments,
Expand Down Expand Up @@ -6061,8 +6061,8 @@ def upsample(
if offset is None:
offset = "0ns"

every = _timedelta_to_pl_duration(every)
offset = _timedelta_to_pl_duration(offset)
every = parse_as_duration_string(every)
offset = parse_as_duration_string(offset)

return self._from_pydf(
self._df.upsample(by, time_column, every, offset, maintain_order)
Expand Down
12 changes: 6 additions & 6 deletions py-polars/polars/dataframe/group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import TYPE_CHECKING, Callable, Iterable, Iterator

from polars import functions as F
from polars.utils.convert import _timedelta_to_pl_duration
from polars.utils.convert import parse_as_duration_string
from polars.utils.deprecation import (
deprecate_renamed_function,
issue_deprecation_warning,
Expand Down Expand Up @@ -792,8 +792,8 @@ def __init__(
by: IntoExpr | Iterable[IntoExpr] | None,
check_sorted: bool,
):
period = _timedelta_to_pl_duration(period)
offset = _timedelta_to_pl_duration(offset)
period = parse_as_duration_string(period)
offset = parse_as_duration_string(offset)

self.df = df
self.time_column = index_column
Expand Down Expand Up @@ -969,9 +969,9 @@ def __init__(
start_by: StartBy,
check_sorted: bool,
):
every = _timedelta_to_pl_duration(every)
period = _timedelta_to_pl_duration(period)
offset = _timedelta_to_pl_duration(offset)
every = parse_as_duration_string(every)
period = parse_as_duration_string(period)
offset = parse_as_duration_string(offset)

self.df = df
self.time_column = index_column
Expand Down
15 changes: 8 additions & 7 deletions py-polars/polars/datatypes/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
if TYPE_CHECKING:
from typing import Literal

from polars.type_aliases import PolarsDataType, PythonDataType, SchemaDict
from polars.type_aliases import PolarsDataType, PythonDataType, SchemaDict, TimeUnit


PY_STR_TO_DTYPE: SchemaDict = {
Expand Down Expand Up @@ -486,16 +486,17 @@ def maybe_cast(el: Any, dtype: PolarsDataType) -> Any:
"""Try casting a value to a value that is valid for the given Polars dtype."""
# cast el if it doesn't match
from polars.utils.convert import (
_datetime_to_pl_timestamp,
_timedelta_to_pl_timedelta,
datetime_to_int,
timedelta_to_int,
)

time_unit: TimeUnit
if isinstance(el, datetime):
time_unit = getattr(dtype, "time_unit", None)
return _datetime_to_pl_timestamp(el, time_unit)
time_unit = getattr(dtype, "time_unit", "us")
return datetime_to_int(el, time_unit)
elif isinstance(el, timedelta):
time_unit = getattr(dtype, "time_unit", None)
return _timedelta_to_pl_timedelta(el, time_unit)
time_unit = getattr(dtype, "time_unit", "us")
return timedelta_to_int(el, time_unit)

py_type = dtype_to_py_type(dtype)
if not isinstance(el, py_type):
Expand Down
10 changes: 5 additions & 5 deletions py-polars/polars/expr/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from polars.datatypes import DTYPE_TEMPORAL_UNITS, Date, Int32
from polars.utils._parse_expr_input import parse_as_expression
from polars.utils._wrap import wrap_expr
from polars.utils.convert import _timedelta_to_pl_duration
from polars.utils.convert import parse_as_duration_string
from polars.utils.deprecation import (
deprecate_function,
deprecate_renamed_function,
Expand Down Expand Up @@ -183,7 +183,7 @@ def truncate(
every = deprecate_saturating(every)
offset = deprecate_saturating(offset)
if not isinstance(every, pl.Expr):
every = _timedelta_to_pl_duration(every)
every = parse_as_duration_string(every)

if use_earliest is not None:
issue_deprecation_warning(
Expand All @@ -203,7 +203,7 @@ def truncate(
return wrap_expr(
self._pyexpr.dt_truncate(
every,
_timedelta_to_pl_duration(offset),
parse_as_duration_string(offset),
)
)

Expand Down Expand Up @@ -340,8 +340,8 @@ def round(

return wrap_expr(
self._pyexpr.dt_round(
_timedelta_to_pl_duration(every),
_timedelta_to_pl_duration(offset),
parse_as_duration_string(every),
parse_as_duration_string(offset),
)
)

Expand Down
10 changes: 5 additions & 5 deletions py-polars/polars/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
parse_as_list_of_expressions,
parse_predicates_constraints_as_expression,
)
from polars.utils.convert import _negate_duration, _timedelta_to_pl_duration
from polars.utils.convert import negate_duration_string, parse_as_duration_string
from polars.utils.deprecation import (
deprecate_function,
deprecate_nonkeyword_arguments,
Expand Down Expand Up @@ -3420,10 +3420,10 @@ def rolling(
period = deprecate_saturating(period)
offset = deprecate_saturating(offset)
if offset is None:
offset = _negate_duration(_timedelta_to_pl_duration(period))
offset = negate_duration_string(parse_as_duration_string(period))

period = _timedelta_to_pl_duration(period)
offset = _timedelta_to_pl_duration(offset)
period = parse_as_duration_string(period)
offset = parse_as_duration_string(offset)

return self._from_pyexpr(
self._pyexpr.rolling(index_column, period, offset, closed, check_sorted)
Expand Down Expand Up @@ -10086,7 +10086,7 @@ def _prepare_rolling_window_args(
min_periods = window_size
window_size = f"{window_size}i"
elif isinstance(window_size, timedelta):
window_size = _timedelta_to_pl_duration(window_size)
window_size = parse_as_duration_string(window_size)
if min_periods is None:
min_periods = 1
return window_size, min_periods
16 changes: 8 additions & 8 deletions py-polars/polars/functions/lit.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
from polars.dependencies import numpy as np
from polars.utils._wrap import wrap_expr
from polars.utils.convert import (
_date_to_pl_date,
_datetime_to_pl_timestamp,
_time_to_pl_time,
_timedelta_to_pl_timedelta,
date_to_int,
datetime_to_int,
time_to_int,
timedelta_to_int,
)

with contextlib.suppress(ImportError): # Module not available when building docs
Expand Down Expand Up @@ -91,7 +91,7 @@ def lit(
time_zone = tzinfo_str

dt_utc = value.replace(tzinfo=timezone.utc)
dt_int = _datetime_to_pl_timestamp(dt_utc, time_unit)
dt_int = datetime_to_int(dt_utc, time_unit)
expr = lit(dt_int).cast(Datetime(time_unit))
if time_zone is not None:
expr = expr.dt.replace_time_zone(
Expand All @@ -105,15 +105,15 @@ def lit(
else:
time_unit = "us"

td_int = _timedelta_to_pl_timedelta(value, time_unit)
td_int = timedelta_to_int(value, time_unit)
return lit(td_int).cast(Duration(time_unit))

elif isinstance(value, time):
time_int = _time_to_pl_time(value)
time_int = time_to_int(value)
return lit(time_int).cast(Time)

elif isinstance(value, date):
date_int = _date_to_pl_date(value)
date_int = date_to_int(value)
return lit(date_int).cast(Date)

elif isinstance(value, pl.Series):
Expand Down
4 changes: 2 additions & 2 deletions py-polars/polars/functions/range/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

from datetime import timedelta

from polars.utils.convert import _timedelta_to_pl_duration
from polars.utils.convert import parse_as_duration_string


def parse_interval_argument(interval: str | timedelta) -> str:
"""Parse the interval argument as a Polars duration string."""
if isinstance(interval, timedelta):
return _timedelta_to_pl_duration(interval)
return parse_as_duration_string(interval)

if " " in interval:
interval = interval.replace(" ", "")
Expand Down
6 changes: 3 additions & 3 deletions py-polars/polars/io/iceberg.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

import polars._reexport as pl
from polars.dependencies import pyiceberg
from polars.utils.convert import _to_python_date, _to_python_datetime
from polars.utils.convert import to_py_date, to_py_datetime

if TYPE_CHECKING:
from datetime import date, datetime
Expand All @@ -34,8 +34,8 @@
__all__ = ["scan_iceberg"]

_temporal_conversions: dict[str, Callable[..., datetime | date]] = {
"_to_python_date": _to_python_date,
"_to_python_datetime": _to_python_datetime,
"to_py_date": to_py_date,
"to_py_datetime": to_py_datetime,
}


Expand Down
16 changes: 8 additions & 8 deletions py-polars/polars/io/pyarrow_dataset/anonymous_scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,10 @@ def _scan_pyarrow_dataset_impl(
if predicate:
from polars.datatypes import Date, Datetime, Duration
from polars.utils.convert import (
_to_python_date,
_to_python_datetime,
_to_python_time,
_to_python_timedelta,
to_py_date,
to_py_datetime,
to_py_time,
to_py_timedelta,
)

_filter = eval(
Expand All @@ -86,10 +86,10 @@ def _scan_pyarrow_dataset_impl(
"Date": Date,
"Datetime": Datetime,
"Duration": Duration,
"_to_python_date": _to_python_date,
"_to_python_datetime": _to_python_datetime,
"_to_python_time": _to_python_time,
"_to_python_timedelta": _to_python_timedelta,
"to_py_date": to_py_date,
"to_py_datetime": to_py_datetime,
"to_py_time": to_py_time,
"to_py_timedelta": to_py_timedelta,
},
)

Expand Down
18 changes: 9 additions & 9 deletions py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
parse_as_list_of_expressions,
)
from polars.utils._wrap import wrap_df, wrap_expr
from polars.utils.convert import _negate_duration, _timedelta_to_pl_duration
from polars.utils.convert import negate_duration_string, parse_as_duration_string
from polars.utils.deprecation import (
deprecate_function,
deprecate_parameter_as_positional,
Expand Down Expand Up @@ -3357,11 +3357,11 @@ def rolling(
offset = deprecate_saturating(offset)
index_column = parse_as_expression(index_column)
if offset is None:
offset = _negate_duration(_timedelta_to_pl_duration(period))
offset = negate_duration_string(parse_as_duration_string(period))

pyexprs_by = parse_as_list_of_expressions(by) if by is not None else []
period = _timedelta_to_pl_duration(period)
offset = _timedelta_to_pl_duration(offset)
period = parse_as_duration_string(period)
offset = parse_as_duration_string(offset)

lgb = self._ldf.rolling(
index_column, period, offset, closed, pyexprs_by, check_sorted
Expand Down Expand Up @@ -3703,14 +3703,14 @@ def group_by_dynamic(

index_column = parse_as_expression(index_column)
if offset is None:
offset = _negate_duration(_timedelta_to_pl_duration(every))
offset = negate_duration_string(parse_as_duration_string(every))

if period is None:
period = every

period = _timedelta_to_pl_duration(period)
offset = _timedelta_to_pl_duration(offset)
every = _timedelta_to_pl_duration(every)
period = parse_as_duration_string(period)
offset = parse_as_duration_string(offset)
every = parse_as_duration_string(every)

pyexprs_by = parse_as_list_of_expressions(by) if by is not None else []
lgb = self._ldf.group_by_dynamic(
Expand Down Expand Up @@ -3887,7 +3887,7 @@ def join_asof(
if isinstance(tolerance, str):
tolerance_str = tolerance
elif isinstance(tolerance, timedelta):
tolerance_str = _timedelta_to_pl_duration(tolerance)
tolerance_str = parse_as_duration_string(tolerance)
else:
tolerance_num = tolerance

Expand Down
Loading

0 comments on commit 7231b44

Please sign in to comment.