Skip to content

Commit

Permalink
fill_nan expression and fix #1478
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Oct 2, 2021
1 parent 898d99e commit a4464e9
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 6 deletions.
7 changes: 6 additions & 1 deletion polars/polars-core/src/series/implementations/dates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,12 @@ macro_rules! impl_dyn_series {

#[cfg(feature = "zip_with")]
fn zip_with_same_type(&self, mask: &BooleanChunked, other: &Series) -> Result<Series> {
try_physical_dispatch!(self, zip_with_same_type, mask, other)
let other = if self.dtype() == &DataType::Date32 {
other.cast_with_dtype(&DataType::Int32)?
} else {
other.cast_with_dtype(&DataType::Int64)?
};
try_physical_dispatch!(self, zip_with_same_type, mask, &other)
}

fn vec_hash(&self, random_state: RandomState) -> AlignedVec<u64> {
Expand Down
7 changes: 6 additions & 1 deletion polars/polars-lazy/src/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1110,7 +1110,7 @@ impl Expr {
}
}

/// Shift the values in the array by some period. See [the eager implementation](polars_core::series::SeriesTrait::fill_null).
/// Replace the null values by a value.
pub fn fill_null(self, fill_value: Expr) -> Self {
map_binary_lazy_field(
self,
Expand All @@ -1132,6 +1132,11 @@ impl Expr {
},
)
}

/// Replace the floating point `NaN` values by a value.
pub fn fill_nan(self, fill_value: Expr) -> Self {
when(self.clone().is_nan()).then(fill_value).otherwise(self)
}
/// Count the values of the Series
/// or
/// Get counts of the group by operation.
Expand Down
20 changes: 16 additions & 4 deletions polars/polars-lazy/src/physical_plan/expressions/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,24 @@ impl CastExpr {
// We use the booleanarray as null series, because we have no null array.
// in a ternary or binary operation, we then do type coercion to matching supertype.
// here we create a null array for the types we cannot cast to from a booleanarray
if matches!(self.data_type, DataType::List(_)) {
// the booleanarray is hacked as null type
if input.bool().is_ok() && input.null_count() == input.len() {
return Ok(ListChunked::full_null(input.name(), input.len()).into_series());

if input.bool().is_ok() && input.null_count() == input.len() {
match self.data_type {
DataType::List(_) => {
return Ok(ListChunked::full_null(input.name(), input.len()).into_series())
}
#[cfg(feature = "dtype-date32")]
DataType::Date32 => {
return Ok(Date32Chunked::full_null(input.name(), input.len()).into_series())
}
#[cfg(feature = "dtype-date64")]
DataType::Date64 => {
return Ok(Date64Chunked::full_null(input.name(), input.len()).into_series())
}
_ => {}
}
}

if self.strict {
input.strict_cast(&self.data_type)
} else {
Expand Down
14 changes: 14 additions & 0 deletions polars/polars-lazy/src/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1885,3 +1885,17 @@ fn test_power_in_agg_list2() -> Result<()> {

Ok(())
}

#[test]
#[cfg(feature = "dtype-date32")]
fn test_fill_nan() -> Result<()> {
let s0 = Series::new("date", &[1, 2, 3]).cast_with_dtype(&DataType::Date32)?;
let s1 = Series::new("float", &[Some(1.0), Some(f32::NAN), Some(3.0)]);

let df = DataFrame::new(vec![s0, s1])?;
let out = df.lazy().fill_nan(Null {}.lit()).collect()?;
let out = out.column("float")?;
assert_eq!(Vec::from(out.f32()?), &[Some(1.0), None, Some(3.0)]);

Ok(())
}
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/expression.rst
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ Manipulation/ selection
Expr.shift
Expr.shift_and_fill
Expr.fill_null
Expr.fill_nan
Expr.forward_fill
Expr.backward_fill
Expr.reverse
Expand Down
7 changes: 7 additions & 0 deletions py-polars/polars/lazy/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,13 @@ def fill_null(self, fill_value: Union[str, int, float, "Expr"]) -> "Expr":
fill_value = expr_to_lit_or_expr(fill_value, str_to_lit=True)
return wrap_expr(self._pyexpr.fill_null(fill_value._pyexpr))

def fill_nan(self, fill_value: Union[str, int, float, "Expr"]) -> "Expr":
"""
Fill none value with a fill value
"""
fill_value = expr_to_lit_or_expr(fill_value, str_to_lit=True)
return wrap_expr(self._pyexpr.fill_nan(fill_value._pyexpr))

def forward_fill(self) -> "Expr":
"""
Fill missing values with the latest seen values
Expand Down
4 changes: 4 additions & 0 deletions py-polars/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,10 @@ impl PyExpr {
self.clone().inner.fill_null(expr.inner).into()
}

pub fn fill_nan(&self, expr: PyExpr) -> PyExpr {
self.clone().inner.fill_nan(expr.inner).into()
}

pub fn filter(&self, predicate: PyExpr) -> PyExpr {
self.clone().inner.filter(predicate.inner).into()
}
Expand Down

0 comments on commit a4464e9

Please sign in to comment.