Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(python): Refactor utils/convert.py module #14739

Merged
merged 24 commits into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions crates/polars-plan/src/logical_plan/pyarrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ pub(super) struct Args {
}

fn to_py_datetime(v: i64, tu: &TimeUnit, tz: Option<&TimeZone>) -> String {
// note: `_to_python_datetime` and the `Datetime`
// note: `to_py_datetime` and the `Datetime`
// dtype have to be in-scope on the python side
match tz {
None => format!("_to_python_datetime({},'{}')", v, tu.to_ascii()),
Some(tz) => format!("_to_python_datetime({},'{}',{})", v, tu.to_ascii(), tz),
None => format!("to_py_datetime({},'{}')", v, tu.to_ascii()),
Some(tz) => format!("to_py_datetime({},'{}',{})", v, tu.to_ascii(), tz),
}
}

Expand Down Expand Up @@ -53,7 +53,7 @@ pub(super) fn predicate_to_pa(
let dtm = to_py_datetime(v, &tu, tz.as_ref());
write!(list_repr, "{dtm},").unwrap();
} else if let AnyValue::Date(v) = av {
write!(list_repr, "_to_python_date({v}),").unwrap();
write!(list_repr, "to_py_date({v}),").unwrap();
} else {
write!(list_repr, "{av},").unwrap();
}
Expand All @@ -79,25 +79,25 @@ pub(super) fn predicate_to_pa(
},
#[cfg(feature = "dtype-date")]
AnyValue::Date(v) => {
// the function `_to_python_date` and the `Date`
// the function `to_py_date` and the `Date`
// dtype have to be in scope on the python side
Some(format!("_to_python_date({v})"))
Some(format!("to_py_date({v})"))
},
#[cfg(feature = "dtype-datetime")]
AnyValue::Datetime(v, tu, tz) => Some(to_py_datetime(v, &tu, tz.as_ref())),
// Activate once pyarrow supports them
// #[cfg(feature = "dtype-time")]
// AnyValue::Time(v) => {
// // the function `_to_python_time` has to be in scope
// // the function `to_py_time` has to be in scope
// // on the python side
// Some(format!("_to_python_time(value={v})"))
// Some(format!("to_py_time(value={v})"))
// }
// #[cfg(feature = "dtype-duration")]
// AnyValue::Duration(v, tu) => {
// // the function `_to_python_timedelta` has to be in scope
// // the function `to_py_timedelta` has to be in scope
// // on the python side
// Some(format!(
// "_to_python_timedelta(value={}, tu='{}')",
// "to_py_timedelta(value={}, tu='{}')",
// v,
// tu.to_ascii()
// ))
Expand Down
6 changes: 3 additions & 3 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@
)
from polars.utils._parse_expr_input import parse_as_expression
from polars.utils._wrap import wrap_expr, wrap_ldf, wrap_s
from polars.utils.convert import _timedelta_to_pl_duration
from polars.utils.convert import parse_as_duration_string
from polars.utils.deprecation import (
deprecate_function,
deprecate_nonkeyword_arguments,
Expand Down Expand Up @@ -6061,8 +6061,8 @@ def upsample(
if offset is None:
offset = "0ns"

every = _timedelta_to_pl_duration(every)
offset = _timedelta_to_pl_duration(offset)
every = parse_as_duration_string(every)
offset = parse_as_duration_string(offset)

return self._from_pydf(
self._df.upsample(by, time_column, every, offset, maintain_order)
Expand Down
12 changes: 6 additions & 6 deletions py-polars/polars/dataframe/group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import TYPE_CHECKING, Callable, Iterable, Iterator

from polars import functions as F
from polars.utils.convert import _timedelta_to_pl_duration
from polars.utils.convert import parse_as_duration_string
from polars.utils.deprecation import (
deprecate_renamed_function,
issue_deprecation_warning,
Expand Down Expand Up @@ -792,8 +792,8 @@ def __init__(
by: IntoExpr | Iterable[IntoExpr] | None,
check_sorted: bool,
):
period = _timedelta_to_pl_duration(period)
offset = _timedelta_to_pl_duration(offset)
period = parse_as_duration_string(period)
offset = parse_as_duration_string(offset)

self.df = df
self.time_column = index_column
Expand Down Expand Up @@ -969,9 +969,9 @@ def __init__(
start_by: StartBy,
check_sorted: bool,
):
every = _timedelta_to_pl_duration(every)
period = _timedelta_to_pl_duration(period)
offset = _timedelta_to_pl_duration(offset)
every = parse_as_duration_string(every)
period = parse_as_duration_string(period)
offset = parse_as_duration_string(offset)

self.df = df
self.time_column = index_column
Expand Down
15 changes: 8 additions & 7 deletions py-polars/polars/datatypes/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
if TYPE_CHECKING:
from typing import Literal

from polars.type_aliases import PolarsDataType, PythonDataType, SchemaDict
from polars.type_aliases import PolarsDataType, PythonDataType, SchemaDict, TimeUnit


PY_STR_TO_DTYPE: SchemaDict = {
Expand Down Expand Up @@ -486,16 +486,17 @@ def maybe_cast(el: Any, dtype: PolarsDataType) -> Any:
"""Try casting a value to a value that is valid for the given Polars dtype."""
# cast el if it doesn't match
from polars.utils.convert import (
_datetime_to_pl_timestamp,
_timedelta_to_pl_timedelta,
datetime_to_int,
timedelta_to_int,
)

time_unit: TimeUnit
if isinstance(el, datetime):
time_unit = getattr(dtype, "time_unit", None)
return _datetime_to_pl_timestamp(el, time_unit)
time_unit = getattr(dtype, "time_unit", "us")
return datetime_to_int(el, time_unit)
elif isinstance(el, timedelta):
time_unit = getattr(dtype, "time_unit", None)
return _timedelta_to_pl_timedelta(el, time_unit)
time_unit = getattr(dtype, "time_unit", "us")
return timedelta_to_int(el, time_unit)

py_type = dtype_to_py_type(dtype)
if not isinstance(el, py_type):
Expand Down
10 changes: 5 additions & 5 deletions py-polars/polars/expr/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from polars.datatypes import DTYPE_TEMPORAL_UNITS, Date, Int32
from polars.utils._parse_expr_input import parse_as_expression
from polars.utils._wrap import wrap_expr
from polars.utils.convert import _timedelta_to_pl_duration
from polars.utils.convert import parse_as_duration_string
from polars.utils.deprecation import (
deprecate_function,
deprecate_renamed_function,
Expand Down Expand Up @@ -183,7 +183,7 @@ def truncate(
every = deprecate_saturating(every)
offset = deprecate_saturating(offset)
if not isinstance(every, pl.Expr):
every = _timedelta_to_pl_duration(every)
every = parse_as_duration_string(every)

if use_earliest is not None:
issue_deprecation_warning(
Expand All @@ -203,7 +203,7 @@ def truncate(
return wrap_expr(
self._pyexpr.dt_truncate(
every,
_timedelta_to_pl_duration(offset),
parse_as_duration_string(offset),
)
)

Expand Down Expand Up @@ -340,8 +340,8 @@ def round(

return wrap_expr(
self._pyexpr.dt_round(
_timedelta_to_pl_duration(every),
_timedelta_to_pl_duration(offset),
parse_as_duration_string(every),
parse_as_duration_string(offset),
)
)

Expand Down
10 changes: 5 additions & 5 deletions py-polars/polars/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
parse_as_list_of_expressions,
parse_predicates_constraints_as_expression,
)
from polars.utils.convert import _negate_duration, _timedelta_to_pl_duration
from polars.utils.convert import negate_duration_string, parse_as_duration_string
from polars.utils.deprecation import (
deprecate_function,
deprecate_nonkeyword_arguments,
Expand Down Expand Up @@ -3420,10 +3420,10 @@ def rolling(
period = deprecate_saturating(period)
offset = deprecate_saturating(offset)
if offset is None:
offset = _negate_duration(_timedelta_to_pl_duration(period))
offset = negate_duration_string(parse_as_duration_string(period))

period = _timedelta_to_pl_duration(period)
offset = _timedelta_to_pl_duration(offset)
period = parse_as_duration_string(period)
offset = parse_as_duration_string(offset)

return self._from_pyexpr(
self._pyexpr.rolling(index_column, period, offset, closed, check_sorted)
Expand Down Expand Up @@ -10086,7 +10086,7 @@ def _prepare_rolling_window_args(
min_periods = window_size
window_size = f"{window_size}i"
elif isinstance(window_size, timedelta):
window_size = _timedelta_to_pl_duration(window_size)
window_size = parse_as_duration_string(window_size)
if min_periods is None:
min_periods = 1
return window_size, min_periods
16 changes: 8 additions & 8 deletions py-polars/polars/functions/lit.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
from polars.dependencies import numpy as np
from polars.utils._wrap import wrap_expr
from polars.utils.convert import (
_date_to_pl_date,
_datetime_to_pl_timestamp,
_time_to_pl_time,
_timedelta_to_pl_timedelta,
date_to_int,
datetime_to_int,
time_to_int,
timedelta_to_int,
)

with contextlib.suppress(ImportError): # Module not available when building docs
Expand Down Expand Up @@ -91,7 +91,7 @@ def lit(
time_zone = tzinfo_str

dt_utc = value.replace(tzinfo=timezone.utc)
dt_int = _datetime_to_pl_timestamp(dt_utc, time_unit)
dt_int = datetime_to_int(dt_utc, time_unit)
expr = lit(dt_int).cast(Datetime(time_unit))
if time_zone is not None:
expr = expr.dt.replace_time_zone(
Expand All @@ -105,15 +105,15 @@ def lit(
else:
time_unit = "us"

td_int = _timedelta_to_pl_timedelta(value, time_unit)
td_int = timedelta_to_int(value, time_unit)
return lit(td_int).cast(Duration(time_unit))

elif isinstance(value, time):
time_int = _time_to_pl_time(value)
time_int = time_to_int(value)
return lit(time_int).cast(Time)

elif isinstance(value, date):
date_int = _date_to_pl_date(value)
date_int = date_to_int(value)
return lit(date_int).cast(Date)

elif isinstance(value, pl.Series):
Expand Down
4 changes: 2 additions & 2 deletions py-polars/polars/functions/range/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

from datetime import timedelta

from polars.utils.convert import _timedelta_to_pl_duration
from polars.utils.convert import parse_as_duration_string


def parse_interval_argument(interval: str | timedelta) -> str:
"""Parse the interval argument as a Polars duration string."""
if isinstance(interval, timedelta):
return _timedelta_to_pl_duration(interval)
return parse_as_duration_string(interval)

if " " in interval:
interval = interval.replace(" ", "")
Expand Down
6 changes: 3 additions & 3 deletions py-polars/polars/io/iceberg.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

import polars._reexport as pl
from polars.dependencies import pyiceberg
from polars.utils.convert import _to_python_date, _to_python_datetime
from polars.utils.convert import to_py_date, to_py_datetime

if TYPE_CHECKING:
from datetime import date, datetime
Expand All @@ -34,8 +34,8 @@
__all__ = ["scan_iceberg"]

_temporal_conversions: dict[str, Callable[..., datetime | date]] = {
"_to_python_date": _to_python_date,
"_to_python_datetime": _to_python_datetime,
"to_py_date": to_py_date,
"to_py_datetime": to_py_datetime,
}


Expand Down
16 changes: 8 additions & 8 deletions py-polars/polars/io/pyarrow_dataset/anonymous_scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,10 @@ def _scan_pyarrow_dataset_impl(
if predicate:
from polars.datatypes import Date, Datetime, Duration
from polars.utils.convert import (
_to_python_date,
_to_python_datetime,
_to_python_time,
_to_python_timedelta,
to_py_date,
to_py_datetime,
to_py_time,
to_py_timedelta,
)

_filter = eval(
Expand All @@ -86,10 +86,10 @@ def _scan_pyarrow_dataset_impl(
"Date": Date,
"Datetime": Datetime,
"Duration": Duration,
"_to_python_date": _to_python_date,
"_to_python_datetime": _to_python_datetime,
"_to_python_time": _to_python_time,
"_to_python_timedelta": _to_python_timedelta,
"to_py_date": to_py_date,
"to_py_datetime": to_py_datetime,
"to_py_time": to_py_time,
"to_py_timedelta": to_py_timedelta,
},
)

Expand Down
18 changes: 9 additions & 9 deletions py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
parse_as_list_of_expressions,
)
from polars.utils._wrap import wrap_df, wrap_expr
from polars.utils.convert import _negate_duration, _timedelta_to_pl_duration
from polars.utils.convert import negate_duration_string, parse_as_duration_string
from polars.utils.deprecation import (
deprecate_function,
deprecate_parameter_as_positional,
Expand Down Expand Up @@ -3357,11 +3357,11 @@ def rolling(
offset = deprecate_saturating(offset)
index_column = parse_as_expression(index_column)
if offset is None:
offset = _negate_duration(_timedelta_to_pl_duration(period))
offset = negate_duration_string(parse_as_duration_string(period))

pyexprs_by = parse_as_list_of_expressions(by) if by is not None else []
period = _timedelta_to_pl_duration(period)
offset = _timedelta_to_pl_duration(offset)
period = parse_as_duration_string(period)
offset = parse_as_duration_string(offset)

lgb = self._ldf.rolling(
index_column, period, offset, closed, pyexprs_by, check_sorted
Expand Down Expand Up @@ -3703,14 +3703,14 @@ def group_by_dynamic(

index_column = parse_as_expression(index_column)
if offset is None:
offset = _negate_duration(_timedelta_to_pl_duration(every))
offset = negate_duration_string(parse_as_duration_string(every))

if period is None:
period = every

period = _timedelta_to_pl_duration(period)
offset = _timedelta_to_pl_duration(offset)
every = _timedelta_to_pl_duration(every)
period = parse_as_duration_string(period)
offset = parse_as_duration_string(offset)
every = parse_as_duration_string(every)

pyexprs_by = parse_as_list_of_expressions(by) if by is not None else []
lgb = self._ldf.group_by_dynamic(
Expand Down Expand Up @@ -3887,7 +3887,7 @@ def join_asof(
if isinstance(tolerance, str):
tolerance_str = tolerance
elif isinstance(tolerance, timedelta):
tolerance_str = _timedelta_to_pl_duration(tolerance)
tolerance_str = parse_as_duration_string(tolerance)
else:
tolerance_num = tolerance

Expand Down
Loading
Loading