Skip to content

Commit

Permalink
feat[rust, python]: various str.strptime enhancements/fixes, and new…
Browse files Browse the repository at this point in the history
… fractional seconds option for dt.seconds, #4829

* add 'fractional' param for "dt.second" method
* address truncated precision in strptime-derived cols
  • Loading branch information
alexander-beedie committed Sep 13, 2022
1 parent e0167a4 commit 051c087
Show file tree
Hide file tree
Showing 11 changed files with 191 additions and 72 deletions.
10 changes: 7 additions & 3 deletions polars/polars-time/src/chunkedarray/utf8/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ where
"%y/%m/%d %H:%M:%S",
//210319 23:58:50
"%y%m%d %H:%M:%S",
// 2019-04-18T02:45:55
// 2021/12/31 12:54:98
"%Y/%m/%d %H:%M:%S",
// 2021-12-31 24:58:01
Expand All @@ -45,13 +44,18 @@ where
"%Y/%m/%d %H:%M:%S",
// 20210319 23:58:50
"%Y%m%d %H:%M:%S",
// note: '%F' cannot be parsed by polars native parser
// 2019-04-18T02:45:55
// %F cannot be parse by polars native parser
"%Y-%m-%dT%H:%M:%S",
// 2019-04-18T02:45:55.555000000
// 2019-04-18T02:45:55[...]
// milliseconds
"%Y-%m-%d %H:%M:%S.%3f",
"%Y-%m-%dT%H:%M:%S.%3f",
// microseconds
"%Y-%m-%d %H:%M:%S.%6f",
"%Y-%m-%dT%H:%M:%S.%6f",
// nanoseconds
"%Y-%m-%d %H:%M:%S.%9f",
"%Y-%m-%dT%H:%M:%S.%9f",
]
.into_iter()
Expand Down
10 changes: 7 additions & 3 deletions polars/polars-time/src/chunkedarray/utf8/patterns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,15 @@ pub(super) static DATETIME_Y_M_D: &[&str] = &[
// 2019-04-18T02:45:55
"%Y-%m-%dT%H:%M:%S",
"%Y-%m-%dT%H:%M:%SZ",
// 2019-04-18T02:45:55.555000000
// microseconds
"%Y-%m-%dT%H:%M:%S.%6f",
// 2019-04-18T02:45:55.555[000000]
// nanoseconds
"%Y-%m-%d %H:%M:%S.%9f",
"%Y-%m-%dT%H:%M:%S.%9f",
// microseconds
"%Y-%m-%d %H:%M:%S.%6f",
"%Y-%m-%dT%H:%M:%S.%6f",
// milliseconds
"%Y-%m-%d %H:%M:%S.%3f",
"%Y-%m-%dT%H:%M:%S.%3f",
// no times
"%Y-%m-%d",
Expand Down
8 changes: 8 additions & 0 deletions py-polars/polars/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,9 @@ class Date(DataType):
class Datetime(DataType):
"""Calendar date and time type."""

tu: TimeUnit | None = None
tz: str | None = None

def __init__(self, time_unit: TimeUnit = "us", time_zone: str | None = None):
"""
Calendar date and time type.
Expand Down Expand Up @@ -243,6 +246,8 @@ def __hash__(self) -> int:
class Duration(DataType):
"""Time duration/delta type."""

tu: TimeUnit | None = None

def __init__(self, time_unit: TimeUnit = "us"):
"""
Time duration/delta type.
Expand Down Expand Up @@ -338,6 +343,8 @@ def __hash__(self) -> int:
return hash(Struct)


TemporalDataType = Union[Type[Datetime], Datetime, Type[Date], Date, Type[Time], Time]

DTYPE_TEMPORAL_UNITS: frozenset[TimeUnit] = frozenset(["ns", "us", "ms"])


Expand Down Expand Up @@ -597,6 +604,7 @@ def maybe_cast(
return _datetime_to_pl_timestamp(el, time_unit)
elif isinstance(el, timedelta):
return _timedelta_to_pl_timedelta(el, time_unit)

py_type = dtype_to_py_type(dtype)
if not isinstance(el, py_type):
el = py_type(el)
Expand Down
91 changes: 58 additions & 33 deletions py-polars/polars/internals/expr/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def year(self) -> pli.Expr:
"""
Extract year from underlying Date representation.
Can be performed on Date and Datetime.
Can be performed on Date and Datetime columns.
Returns the year number in the calendar date.
Expand Down Expand Up @@ -197,7 +197,7 @@ def quarter(self) -> pli.Expr:
"""
Extract quarter from underlying Date representation.
Can be performed on Date and Datetime.
Can be performed on Date and Datetime columns.
Returns the quarter ranging from 1 to 4.
Expand Down Expand Up @@ -245,7 +245,7 @@ def month(self) -> pli.Expr:
"""
Extract month from underlying Date representation.
Can be performed on Date and Datetime.
Can be performed on Date and Datetime columns.
Returns the month number starting from 1.
The return value ranges from 1 to 12.
Expand Down Expand Up @@ -294,7 +294,7 @@ def week(self) -> pli.Expr:
"""
Extract the week from the underlying Date representation.
Can be performed on Date and Datetime
Can be performed on Date and Datetime columns.
Returns the ISO week number starting from 1.
The return value ranges from 1 to 53. (The last week of year differs by years.)
Expand Down Expand Up @@ -343,7 +343,7 @@ def weekday(self) -> pli.Expr:
"""
Extract the week day from the underlying Date representation.
Can be performed on Date and Datetime.
Can be performed on Date and Datetime columns.
Returns the weekday number where monday = 0 and sunday = 6
Expand Down Expand Up @@ -397,7 +397,7 @@ def day(self) -> pli.Expr:
"""
Extract day from underlying Date representation.
Can be performed on Date and Datetime.
Can be performed on Date and Datetime columns.
Returns the day of month starting from 1.
The return value ranges from 1 to 31. (The last day of month differs by months.)
Expand Down Expand Up @@ -452,7 +452,7 @@ def ordinal_day(self) -> pli.Expr:
"""
Extract ordinal day from underlying Date representation.
Can be performed on Date and Datetime.
Can be performed on Date and Datetime columns.
Returns the day of year starting from 1.
The return value ranges from 1 to 366. (The last day of year differs by years.)
Expand Down Expand Up @@ -507,7 +507,7 @@ def hour(self) -> pli.Expr:
"""
Extract hour from underlying DateTime representation.
Can be performed on Datetime.
Can be performed on Datetime columns.
Returns the hour number from 0 to 23.
Expand Down Expand Up @@ -555,7 +555,7 @@ def minute(self) -> pli.Expr:
"""
Extract minutes from underlying DateTime representation.
Can be performed on Datetime.
Can be performed on Datetime columns.
Returns the minute number from 0 to 59.
Expand Down Expand Up @@ -601,61 +601,86 @@ def minute(self) -> pli.Expr:
"""
return pli.wrap_expr(self._pyexpr.minute())

def second(self) -> pli.Expr:
def second(self, fractional: bool = False) -> pli.Expr:
"""
Extract seconds from underlying DateTime representation.
Can be performed on Datetime.
Can be performed on Datetime columns.
Returns the second number from 0 to 59.
Returns the integer second number from 0 to 59, or a floating
point number from 0 < 60 if ``fractional=True`` that includes
any milli/micro/nanosecond component.
Returns
-------
Second as UInt32
Second as UInt32 (or Float64)
Examples
--------
>>> from datetime import timedelta, datetime
>>> start = datetime(2001, 1, 1)
>>> stop = datetime(2001, 1, 1, 0, 0, 4)
>>> df = pl.DataFrame(
... {"date": pl.date_range(start, stop, timedelta(seconds=2))}
... data={
... "date": pl.date_range(
... low=datetime(2001, 1, 1, 0, 0, 0, 456789),
... high=datetime(2001, 1, 1, 0, 0, 6),
... interval=timedelta(seconds=2, microseconds=654321),
... )
... }
... )
>>> df
shape: (3, 1)
┌─────────────────────┐
│ date │
│ --- │
│ datetime[μs] │
╞═════════════════════╡
│ 2001-01-01 00:00:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2001-01-01 00:00:02
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2001-01-01 00:00:04
└─────────────────────┘
>>> df.select(pl.col("date").dt.second())
┌────────────────────────────
│ date
│ ---
│ datetime[μs]
╞════════════════════════════
│ 2001-01-01 00:00:00.456789
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌
│ 2001-01-01 00:00:03.111110
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌
│ 2001-01-01 00:00:05.765431
└────────────────────────────
>>> df.select(pl.col("date").dt.second().alias("secs"))
shape: (3, 1)
┌──────┐
date
secs
│ --- │
│ u32 │
╞══════╡
│ 0 │
├╌╌╌╌╌╌┤
2
3
├╌╌╌╌╌╌┤
4
5
└──────┘
>>> df.select(pl.col("date").dt.second(fractional=True).alias("secs"))
shape: (3, 1)
┌──────────┐
│ secs │
│ --- │
│ f64 │
╞══════════╡
│ 0.456789 │
├╌╌╌╌╌╌╌╌╌╌┤
│ 3.11111 │
├╌╌╌╌╌╌╌╌╌╌┤
│ 5.765431 │
└──────────┘
"""
return pli.wrap_expr(self._pyexpr.second())
sec = pli.wrap_expr(self._pyexpr.second())
return (
sec + (pli.wrap_expr(self._pyexpr.nanosecond()) / pli.lit(1_000_000_000.0))
if fractional
else sec
)

def nanosecond(self) -> pli.Expr:
"""
Extract seconds from underlying DateTime representation.
Can be performed on Datetime.
Can be performed on Datetime columns.
Returns the number of nanoseconds since the whole non-leap second.
The range from 1,000,000,000 to 1,999,999,999 represents the leap second.
Expand Down
23 changes: 19 additions & 4 deletions py-polars/polars/internals/expr/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,14 @@
from typing import TYPE_CHECKING

import polars.internals as pli
from polars.datatypes import DataType, Date, Datetime, Time, is_polars_dtype
from polars.datatypes import (
DataType,
Date,
Datetime,
TemporalDataType,
Time,
is_polars_dtype,
)
from polars.utils import deprecated_alias

if TYPE_CHECKING:
Expand All @@ -20,7 +27,7 @@ def __init__(self, expr: pli.Expr):

def strptime(
self,
datatype: type[Date] | type[Datetime] | type[Time],
datatype: TemporalDataType,
fmt: str | None = None,
strict: bool = True,
exact: bool = True,
Expand All @@ -31,7 +38,7 @@ def strptime(
Parameters
----------
datatype
Date | Datetime | Time.
Date | Datetime | Time
fmt
Format to use, refer to the `chrono strftime documentation
<https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_
Expand All @@ -42,6 +49,12 @@ def strptime(
- If True, require an exact format match.
- If False, allow the format to match anywhere in the target string.
Notes
-----
When parsing a Datetime the column precision will be inferred from
the format string, if given, eg: "%F %T%.3f" => Datetime("ms"). If
no fractional second component is found then the default is "us".
Examples
--------
Dealing with different formats.
Expand Down Expand Up @@ -88,7 +101,9 @@ def strptime(
if datatype == Date:
return pli.wrap_expr(self._pyexpr.str_parse_date(fmt, strict, exact))
elif datatype == Datetime:
return pli.wrap_expr(self._pyexpr.str_parse_datetime(fmt, strict, exact))
tu = datatype.tu # type: ignore[union-attr]
dtcol = pli.wrap_expr(self._pyexpr.str_parse_datetime(fmt, strict, exact))
return dtcol if (tu is None) else dtcol.dt.cast_time_unit(tu)
elif datatype == Time:
return pli.wrap_expr(self._pyexpr.str_parse_time(fmt, strict, exact))
else: # pragma: no cover
Expand Down

0 comments on commit 051c087

Please sign in to comment.