Skip to content

Commit

Permalink
add strict conversion for strptime
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jan 10, 2022
1 parent 4c46344 commit 94e9ecc
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 35 deletions.
12 changes: 9 additions & 3 deletions py-polars/polars/internals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2436,6 +2436,7 @@ def strptime(
self,
datatype: Union[Type[Date], Type[Datetime]],
fmt: Optional[str] = None,
strict: bool = True,
) -> Expr:
"""
Parse utf8 expression as a Date/Datetimetype.
Expand All @@ -2445,16 +2446,21 @@ def strptime(
datatype
Date | Datetime.
fmt
"yyyy-mm-dd".
format to use, see the following link for examples:
https://docs.rs/chrono/latest/chrono/format/strftime/index.html
example: "%y-%m-%d".
strict
raise an error if any conversion fails
"""
if not issubclass(datatype, DataType):
raise ValueError(
f"expected: {DataType} got: {datatype}"
) # pragma: no cover
if datatype == Date:
return wrap_expr(self._pyexpr.str_parse_date(fmt))
return wrap_expr(self._pyexpr.str_parse_date(fmt, strict))
elif datatype == Datetime:
return wrap_expr(self._pyexpr.str_parse_datetime(fmt))
return wrap_expr(self._pyexpr.str_parse_datetime(fmt, strict))
else:
raise ValueError(
"dtype should be of type {Date, Datetime}"
Expand Down
25 changes: 18 additions & 7 deletions py-polars/polars/internals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3415,7 +3415,12 @@ class StringNameSpace:
def __init__(self, series: "Series"):
self._s = series._s

def strptime(self, datatype: Type[DataType], fmt: Optional[str] = None) -> Series:
def strptime(
self,
datatype: Union[Type[Date], Type[Datetime]],
fmt: Optional[str] = None,
strict: bool = True,
) -> Series:
"""
Parse a Series of dtype Utf8 to a Date/Datetime Series.
Expand All @@ -3424,17 +3429,23 @@ def strptime(self, datatype: Type[DataType], fmt: Optional[str] = None) -> Serie
datatype
Date or Datetime.
fmt
formatting syntax. [Read more](https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html)
format to use, see the following link for examples:
https://docs.rs/chrono/latest/chrono/format/strftime/index.html
example: "%y-%m-%d".
strict
raise an error if any conversion fails
Returns
-------
A Date/ Datetime Series
"""
if datatype == Date:
return wrap_s(self._s.str_parse_date(fmt))
if datatype == Datetime:
return wrap_s(self._s.str_parse_datetime(fmt))
raise NotImplementedError # pragma: no cover
s = wrap_s(self._s)
return (
s.to_frame()
.select(pli.col(s.name).str.strptime(datatype, fmt, strict))
.to_series()
)

def lengths(self) -> Series:
"""
Expand Down
32 changes: 27 additions & 5 deletions py-polars/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -336,22 +336,44 @@ impl PyExpr {
self.clone().inner.product().into()
}

pub fn str_parse_date(&self, fmt: Option<String>) -> PyExpr {
pub fn str_parse_date(&self, fmt: Option<String>, strict: bool) -> PyExpr {
let function = move |s: Series| {
let ca = s.utf8()?;
ca.as_date(fmt.as_deref()).map(|ca| ca.into_series())
let out = ca.as_date(fmt.as_deref())?;
if strict {
if out.null_count() != ca.null_count() {
Err(PolarsError::ComputeError(
"strict conversion to dates failed, maybe set strict=False".into(),
))
} else {
Ok(out.into_series())
}
} else {
Ok(out.into_series())
}
};
self.clone()
.inner
.map(function, GetOutput::from_type(DataType::Date))
.into()
}

pub fn str_parse_datetime(&self, fmt: Option<String>) -> PyExpr {
pub fn str_parse_datetime(&self, fmt: Option<String>, strict: bool) -> PyExpr {
let function = move |s: Series| {
let ca = s.utf8()?;
ca.as_datetime(fmt.as_deref(), TimeUnit::Milliseconds)
.map(|ca| ca.into_series())
let out = ca.as_datetime(fmt.as_deref(), TimeUnit::Milliseconds)?;

if strict {
if out.null_count() != ca.null_count() {
Err(PolarsError::ComputeError(
"strict conversion to dates failed, maybe set strict=False".into(),
))
} else {
Ok(out.into_series())
}
} else {
Ok(out.into_series())
}
};
self.clone()
.inner
Expand Down
20 changes: 0 additions & 20 deletions py-polars/src/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1038,26 +1038,6 @@ impl PySeries {
Ok(s.into())
}

pub fn str_parse_date(&self, fmt: Option<&str>) -> PyResult<Self> {
if let Ok(ca) = &self.series.utf8() {
let ca = ca.as_date(fmt).map_err(PyPolarsEr::from)?;
Ok(PySeries::new(ca.into_series()))
} else {
Err(PyPolarsEr::Other("cannot parse Date expected utf8 type".into()).into())
}
}

pub fn str_parse_datetime(&self, fmt: Option<&str>) -> PyResult<Self> {
if let Ok(ca) = &self.series.utf8() {
let ca = ca
.as_datetime(fmt, TimeUnit::Milliseconds)
.map_err(PyPolarsEr::from)?;
Ok(ca.into_series().into())
} else {
Err(PyPolarsEr::Other("cannot parse datetime expected utf8 type".into()).into())
}
}

pub fn str_slice(&self, start: i64, length: Option<u64>) -> PyResult<Self> {
let ca = self.series.utf8().map_err(PyPolarsEr::from)?;
let s = ca
Expand Down

0 comments on commit 94e9ecc

Please sign in to comment.