Skip to content

Commit

Permalink
feat[rust]: serialize str and nan expressions (#4366)
Browse files Browse the repository at this point in the history
  • Loading branch information
universalmind303 committed Aug 13, 2022
1 parent 4f896c4 commit 1444cd1
Show file tree
Hide file tree
Showing 10 changed files with 334 additions and 161 deletions.
4 changes: 2 additions & 2 deletions polars/polars-lazy/src/dsl/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,6 @@ pub enum Expr {
output_type: GetOutput,
options: FunctionOptions,
},
#[cfg_attr(feature = "serde", serde(skip))]
Function {
/// function arguments
input: Vec<Expr>,
Expand Down Expand Up @@ -342,7 +341,6 @@ pub enum Expr {
length: Box<Expr>,
},
/// Can be used in a select statement to exclude a column from selection
#[cfg_attr(feature = "serde", serde(skip))]
Exclude(Box<Expr>, Vec<Excluded>),
/// Set root name as Alias
KeepName(Box<Expr>),
Expand Down Expand Up @@ -378,6 +376,8 @@ impl Default for Expr {
}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]

pub enum Excluded {
Name(Arc<str>),
Dtype(DataType),
Expand Down
110 changes: 88 additions & 22 deletions polars/polars-lazy/src/dsl/function_expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ mod fill_null;
mod is_in;
#[cfg(feature = "is_in")]
mod list;

mod nan;
mod pow;
#[cfg(all(feature = "rolling_window", feature = "moment"))]
mod rolling;
Expand All @@ -21,6 +21,11 @@ mod temporal;
#[cfg(feature = "trigonometry")]
mod trigonometry;

pub(super) use self::nan::NanFunction;

#[cfg(feature = "strings")]
pub(super) use self::strings::StringFunction;

use super::*;
use polars_core::prelude::*;
#[cfg(feature = "serde")]
Expand All @@ -38,14 +43,7 @@ pub enum FunctionExpr {
#[cfg(feature = "arg_where")]
ArgWhere,
#[cfg(feature = "strings")]
StringContains {
pat: String,
literal: bool,
},
#[cfg(feature = "strings")]
StringStartsWith(String),
#[cfg(feature = "strings")]
StringEndsWith(String),
StringExpr(StringFunction),
#[cfg(feature = "date_offset")]
DateOffset(Duration),
#[cfg(feature = "trigonometry")]
Expand All @@ -66,6 +64,7 @@ pub enum FunctionExpr {
ShiftAndFill {
periods: i64,
},
Nan(NanFunction),
}

#[cfg(feature = "trigonometry")]
Expand Down Expand Up @@ -128,9 +127,22 @@ impl FunctionExpr {
#[cfg(feature = "arg_where")]
ArgWhere => with_dtype(IDX_DTYPE),
#[cfg(feature = "strings")]
StringContains { .. } | StringEndsWith(_) | StringStartsWith(_) => {
with_dtype(DataType::Boolean)
StringExpr(s) => {
use StringFunction::*;
match s {
Contains { .. } | EndsWith(_) | StartsWith(_) => with_dtype(DataType::Boolean),
Extract { .. } => same_type(),
ExtractAll(_) => with_dtype(DataType::List(Box::new(DataType::Utf8))),
CountMatch(_) => with_dtype(DataType::UInt32),
#[cfg(feature = "string_justify")]
Zfill { .. } | LJust { .. } | RJust { .. } => same_type(),
#[cfg(feature = "temporal")]
Strptime(options) => with_dtype(options.date_dtype.clone()),
#[cfg(feature = "concat_str")]
Concat(_) => with_dtype(DataType::Utf8),
}
}

#[cfg(feature = "date_offset")]
DateOffset(_) => same_type(),
#[cfg(feature = "trigonometry")]
Expand All @@ -143,6 +155,7 @@ impl FunctionExpr {
#[cfg(all(feature = "rolling_window", feature = "moment"))]
RollingSkew { .. } => float_dtype(),
ShiftAndFill { .. } => same_type(),
Nan(n) => n.get_field(fields),
}
}
}
Expand All @@ -167,6 +180,7 @@ macro_rules! map_as_slice {
}

// Fn(&Series)
#[macro_export(super)]
macro_rules! map_without_args {
($func:path) => {{
let f = move |s: &mut [Series]| {
Expand All @@ -178,6 +192,19 @@ macro_rules! map_without_args {
}};
}

// FnOnce(Series)
#[macro_export(super)]
macro_rules! map_owned_without_args {
($func:path) => {{
let f = move |s: &mut [Series]| {
let s = std::mem::take(&mut s[0]);
$func(s)
};

SpecialEq::new(Arc::new(f))
}};
}

// Fn(&Series, args)
macro_rules! map_with_args {
($func:path, $($args:expr),*) => {{
Expand All @@ -191,6 +218,7 @@ macro_rules! map_with_args {
}

// FnOnce(Series, args)
#[macro_export(super)]
macro_rules! map_owned_with_args {
($func:path, $($args:expr),*) => {{
let f = move |s: &mut [Series]| {
Expand Down Expand Up @@ -229,17 +257,8 @@ impl From<FunctionExpr> for SpecialEq<Arc<dyn SeriesUdf>> {
wrap!(arg_where::arg_where)
}
#[cfg(feature = "strings")]
StringContains { pat, literal } => {
map_with_args!(strings::contains, &pat, literal)
}
#[cfg(feature = "strings")]
StringEndsWith(sub) => {
map_with_args!(strings::ends_with, &sub)
}
#[cfg(feature = "strings")]
StringStartsWith(sub) => {
map_with_args!(strings::starts_with, &sub)
}
StringExpr(s) => s.into(),

#[cfg(feature = "date_offset")]
DateOffset(offset) => {
map_owned_with_args!(temporal::date_offset, offset)
Expand All @@ -255,6 +274,7 @@ impl From<FunctionExpr> for SpecialEq<Arc<dyn SeriesUdf>> {
FillNull { super_type } => {
map_as_slice!(fill_null::fill_null, &super_type)
}

#[cfg(feature = "is_in")]
ListContains => {
wrap!(list::contains)
Expand All @@ -266,6 +286,52 @@ impl From<FunctionExpr> for SpecialEq<Arc<dyn SeriesUdf>> {
ShiftAndFill { periods } => {
map_as_slice!(shift_and_fill::shift_and_fill, periods)
}
Nan(n) => n.into(),
}
}
}

#[cfg(feature = "strings")]
impl From<StringFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
fn from(func: StringFunction) -> Self {
use StringFunction::*;
match func {
Contains { pat, literal } => {
map_with_args!(strings::contains, &pat, literal)
}
EndsWith(sub) => {
map_with_args!(strings::ends_with, &sub)
}
StartsWith(sub) => {
map_with_args!(strings::starts_with, &sub)
}
Extract { pat, group_index } => {
map_with_args!(strings::extract, &pat, group_index)
}
ExtractAll(pat) => {
map_with_args!(strings::extract_all, &pat)
}
CountMatch(pat) => {
map_with_args!(strings::count_match, &pat)
}
#[cfg(feature = "string_justify")]
Zfill(alignment) => {
map_with_args!(strings::zfill, alignment)
}
#[cfg(feature = "string_justify")]
LJust { width, fillchar } => {
map_with_args!(strings::ljust, width, fillchar)
}
#[cfg(feature = "string_justify")]
RJust { width, fillchar } => {
map_with_args!(strings::rjust, width, fillchar)
}
#[cfg(feature = "temporal")]
Strptime(options) => {
map_with_args!(strings::strptime, &options)
}
#[cfg(feature = "concat_str")]
Concat(delimiter) => map_with_args!(strings::concat, &delimiter),
}
}
}
71 changes: 71 additions & 0 deletions polars/polars-lazy/src/dsl/function_expr/nan.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
use crate::{map_owned_without_args, map_without_args};

use super::*;

#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, PartialEq, Debug, Eq, Hash)]
pub enum NanFunction {
IsNan,
IsNotNan,
DropNans,
}

pub(super) fn is_nan(s: &Series) -> Result<Series> {
s.is_nan().map(|ca| ca.into_series())
}

pub(super) fn is_not_nan(s: &Series) -> Result<Series> {
s.is_not_nan().map(|ca| ca.into_series())
}

pub(super) fn drop_nans(s: Series) -> Result<Series> {
match s.dtype() {
DataType::Float32 => {
let ca = s.f32()?;
let mask = ca.is_not_nan();
ca.filter(&mask).map(|ca| ca.into_series())
}
DataType::Float64 => {
let ca = s.f64()?;
let mask = ca.is_not_nan();
ca.filter(&mask).map(|ca| ca.into_series())
}
_ => Ok(s),
}
}

impl NanFunction {
pub(crate) fn get_field(&self, fields: &[Field]) -> Result<Field> {
let with_dtype = |dtype: DataType| Ok(Field::new(fields[0].name(), dtype));
let map_dtype = |func: &dyn Fn(&DataType) -> DataType| {
let dtype = func(fields[0].data_type());
Ok(Field::new(fields[0].name(), dtype))
};
let same_type = || map_dtype(&|dtype| dtype.clone());

match self {
NanFunction::IsNan => with_dtype(DataType::Boolean),
NanFunction::IsNotNan => with_dtype(DataType::Boolean),
NanFunction::DropNans => same_type(),
}
}
}

impl From<NanFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
fn from(nan_function: NanFunction) -> Self {
match nan_function {
NanFunction::IsNan => map_without_args!(is_nan),
NanFunction::IsNotNan => map_without_args!(is_not_nan),
NanFunction::DropNans => map_owned_without_args!(drop_nans),
}
}
}

impl From<NanFunction> for FunctionExpr {
fn from(nan_function: NanFunction) -> Self {
FunctionExpr::Nan(nan_function)
}
}

0 comments on commit 1444cd1

Please sign in to comment.