Skip to content

Commit

Permalink
allow concat/append expressions (#3541)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed May 31, 2022
1 parent 77ae80a commit a36c992
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 6 deletions.
25 changes: 25 additions & 0 deletions polars/polars-lazy/src/dsl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ use polars_arrow::array::default_arrays::FromData;
#[cfg(feature = "diff")]
use polars_core::series::ops::NullBehavior;
use polars_core::utils::{get_supertype, NoNull};
use polars_ops::prelude::SeriesOps;

pub fn binary_expr(l: Expr, op: Operator, r: Expr) -> Expr {
Expr::BinaryExpr {
Expand Down Expand Up @@ -421,6 +422,30 @@ impl Expr {
}
}

/// Append expressions. This is done by adding the chunks of `other` to this [`Series`].
pub fn append<E: Into<Expr>>(self, other: E, upcast: bool) -> Self {
let output_type = if upcast {
GetOutput::super_type()
} else {
GetOutput::same_type()
};

apply_binary(
self,
other.into(),
move |mut a, mut b| {
if upcast {
let dtype = get_supertype(a.dtype(), b.dtype())?;
a = a.cast(&dtype)?;
b = b.cast(&dtype)?;
}
a.append(&b)?;
Ok(a)
},
output_type,
)
}

/// Get the first `n` elements of the Expr result
pub fn head(self, length: Option<usize>) -> Self {
self.slice(lit(0), lit(length.unwrap_or(10) as u64))
Expand Down
2 changes: 2 additions & 0 deletions py-polars/docs/source/reference/expression.rst
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ Manipulation/ selection

Expr.inspect
Expr.slice
Expr.append
Expr.explode
Expr.flatten
Expr.take_every
Expand Down Expand Up @@ -204,6 +205,7 @@ Manipulation/ selection
Expr.reinterpret
Expr.drop_nulls
Expr.drop_nans
Expr.rechunk
Expr.interpolate
Expr.arg_sort
Expr.clip
Expand Down
20 changes: 20 additions & 0 deletions py-polars/polars/internals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,26 @@ def slice(self, offset: Union[int, "Expr"], length: Union[int, "Expr"]) -> "Expr
length = pli.lit(length)
return wrap_expr(self._pyexpr.slice(offset._pyexpr, length._pyexpr))

def append(self, other: "Expr", upcast: bool = True) -> "Expr":
"""
Append expressions. This is done by adding the chunks of `other` to this `Series`.
Parameters
----------
other
Expression to append
upcast
Cast both `Series` to the same supertype
"""
other = expr_to_lit_or_expr(other)
return wrap_expr(self._pyexpr.append(other._pyexpr, upcast))

def rechunk(self) -> "Expr":
"""
Create a single chunk of memory for this Series.
"""
return wrap_expr(self._pyexpr.rechunk())

def drop_nulls(self) -> "Expr":
"""
Drop null values.
Expand Down
31 changes: 25 additions & 6 deletions py-polars/polars/internals/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,25 @@ def concat(
...


@overload
def concat(
items: Sequence["pli.Expr"],
rechunk: bool = True,
how: str = "vertical",
) -> "pli.Expr":
...


def concat(
items: Union[
Sequence["pli.DataFrame"], Sequence["pli.Series"], Sequence["pli.LazyFrame"]
Sequence["pli.DataFrame"],
Sequence["pli.Series"],
Sequence["pli.LazyFrame"],
Sequence["pli.Expr"],
],
rechunk: bool = True,
how: str = "vertical",
) -> Union["pli.DataFrame", "pli.Series", "pli.LazyFrame"]:
) -> Union["pli.DataFrame", "pli.Series", "pli.LazyFrame", "pli.Expr"]:
"""
Aggregate all the Dataframes/Series in a List of DataFrames/Series to a single DataFrame/Series.
Expand Down Expand Up @@ -105,8 +117,9 @@ def concat(
if not len(items) > 0:
raise ValueError("cannot concat empty list")

out: Union["pli.Series", "pli.DataFrame", "pli.LazyFrame"]
if isinstance(items[0], pli.DataFrame):
out: Union["pli.Series", "pli.DataFrame", "pli.LazyFrame", "pli.Expr"]
first = items[0]
if isinstance(first, pli.DataFrame):
if how == "vertical":
out = pli.wrap_df(_concat_df(items))
elif how == "diagonal":
Expand All @@ -117,10 +130,16 @@ def concat(
raise ValueError(
f"how should be one of {'vertical', 'diagonal'}, got {how}"
)
elif isinstance(items[0], pli.LazyFrame):
elif isinstance(first, pli.LazyFrame):
return pli.wrap_ldf(_concat_lf(items, rechunk))
else:
elif isinstance(first, pli.Series):
out = pli.wrap_s(_concat_series(items))
elif isinstance(first, pli.Expr):
out = first
for e in items[1:]:
out = out.append(e) # type: ignore
else:
raise ValueError(f"did not expect type: {type(first)} in 'pl.concat'.")

if rechunk:
return out.rechunk()
Expand Down
11 changes: 11 additions & 0 deletions py-polars/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,17 @@ impl PyExpr {
self.inner.clone().slice(offset.inner, length.inner).into()
}

pub fn append(&self, other: PyExpr, upcast: bool) -> PyExpr {
self.inner.clone().append(other.inner, upcast).into()
}

pub fn rechunk(&self) -> PyExpr {
self.inner
.clone()
.map(|s| Ok(s.rechunk()), GetOutput::same_type())
.into()
}

pub fn round(&self, decimals: u32) -> PyExpr {
self.clone().inner.round(decimals).into()
}
Expand Down
13 changes: 13 additions & 0 deletions py-polars/tests/test_exprs.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,3 +235,16 @@ def test_power_by_expression() -> None:
None,
46656.0,
]


def test_expression_appends() -> None:
df = pl.DataFrame({"a": [1, 1, 2]})

assert df.select(pl.repeat(None, 3).append(pl.col("a"))).n_chunks() == 2

assert df.select(pl.repeat(None, 3).append(pl.col("a")).rechunk()).n_chunks() == 1

out = df.select(pl.concat([pl.repeat(None, 3), pl.col("a")]))

assert out.n_chunks() == 1
assert out.to_series().to_list() == [None, None, None, 1, 1, 2]

0 comments on commit a36c992

Please sign in to comment.