Skip to content

Commit

Permalink
keep_name expression
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Aug 5, 2021
1 parent 9eda75e commit 5ef8531
Show file tree
Hide file tree
Showing 8 changed files with 175 additions and 2 deletions.
22 changes: 22 additions & 0 deletions polars/polars-lazy/src/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,8 @@ pub enum Expr {
},
/// Can be used in a select statement to exclude a column from selection
Except(Box<Expr>),
/// Set root name as Alias
KeepName(Box<Expr>),
}

impl Expr {
Expand Down Expand Up @@ -352,6 +354,7 @@ impl fmt::Debug for Expr {
} => write!(f, "SLICE {:?} offset: {} len: {}", input, offset, length),
Wildcard => write!(f, "*"),
Except(column) => write!(f, "EXCEPT {:?}", column),
KeepName(e) => write!(f, "KEEP NAME {:?}", e),
}
}
}
Expand Down Expand Up @@ -1146,9 +1149,28 @@ impl Expr {

#[cfg(feature = "mode")]
#[cfg_attr(docsrs, doc(cfg(feature = "mode")))]
/// Compute the mode(s) of this column. These is the most occurring value.
pub fn mode(self) -> Expr {
self.map(|s| s.mode().map(|ca| ca.into_series()), None)
}

/// Keep the original root name
///
/// ```
/// use polars_core::prelude::*;
/// use polars_lazy::prelude::*;
///
/// fn example(df: LazyFrame) -> LazyFrame {
/// df.select(vec![
/// // even thought the alias yields a different column name,
/// // `keep_name` will make sure that the original column name is used
/// col("*").alias("foo").keep_name()
/// ])
/// }
/// ```
pub fn keep_name(self) -> Expr {
Expr::KeepName(Box::new(self))
}
}

/// Create a Column Expression based on a column name.
Expand Down
19 changes: 19 additions & 0 deletions polars/polars-lazy/src/frame.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2455,4 +2455,23 @@ mod test {

Ok(())
}

#[test]
fn test_keep_name() -> Result<()> {
let df = df![
"a" => [1, 2, 3],
"b" => [1, 2, 3]
]?;

let out = df
.lazy()
.select(vec![
col("a").alias("bar").keep_name(),
col("b").alias("bar").keep_name(),
])
.collect()?;

assert_eq!(out.get_column_names(), &["a", "b"]);
Ok(())
}
}
1 change: 1 addition & 0 deletions polars/polars-lazy/src/logical_plan/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ pub(crate) fn to_aexpr(expr: Expr, arena: &mut Arena<AExpr>) -> Node {
},
Expr::Wildcard => AExpr::Wildcard,
Expr::Except(input) => AExpr::Except(to_aexpr(*input, arena)),
Expr::KeepName(_) => panic!("no keep name expected at this point"),
};
arena.add(v)
}
Expand Down
1 change: 1 addition & 0 deletions polars/polars-lazy/src/logical_plan/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ impl<'a> Iterator for ExprIter<'a> {
push(input_b)
}
Except(e) => push(e),
KeepName(e) => push(e),
}
current_expr
})
Expand Down
21 changes: 21 additions & 0 deletions polars/polars-lazy/src/logical_plan/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -828,6 +828,25 @@ fn replace_wildcard_with_column(expr: Expr, column_name: Arc<String>) -> Expr {
Expr::Column(_) => expr,
Expr::Literal(_) => expr,
Expr::Except(_) => expr,
Expr::KeepName(e) => {
Expr::KeepName(Box::new(replace_wildcard_with_column(*e, column_name)))
}
}
}

fn rewrite_keep_name(expr: Expr) -> Expr {
if has_expr(&expr, |e| matches!(e, Expr::KeepName(_))) {
if let Expr::KeepName(expr) = expr {
let roots = expr_to_root_column_names(&expr);
let name = roots
.get(0)
.expect("expected root column to keep expression name");
Expr::Alias(expr, name.clone())
} else {
panic!("keep_name should be last expression")
}
} else {
expr
}
}

Expand Down Expand Up @@ -914,9 +933,11 @@ fn rewrite_projections(exprs: Vec<Expr>, schema: &Schema) -> Vec<Expr> {
for field in schema.fields() {
let name = field.name();
let new_expr = replace_wildcard_with_column(expr.clone(), Arc::new(name.clone()));
let new_expr = rewrite_keep_name(new_expr);
result.push(new_expr)
}
} else {
let expr = rewrite_keep_name(expr);
result.push(expr)
};
}
Expand Down
19 changes: 17 additions & 2 deletions py-polars/docs/source/reference/expression.rst
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ Manipulation/ selection
.. autosummary::
:toctree: api/

Expr.alias
Expr.slice
Expr.explode
Expr.take_every
Expand All @@ -101,6 +100,22 @@ Manipulation/ selection
Expr.tail
Expr.reinterpret

Column names
------------
Expressions that help renaming/ selecting columns by name.

A wildcard `col("*")` selects all columns in a DataFrame.

Examples
--------

>>> df.select(col("*"))

.. autosummary::
:toctree: api/

Expr.alias
Expr.keep_name

Apply
-----
Expand All @@ -111,7 +126,7 @@ Apply
Expr.apply

Window
--------
------
.. autosummary::
:toctree: api/

Expand Down
91 changes: 91 additions & 0 deletions py-polars/polars/lazy/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,99 @@ def alias(self, name: str) -> "Expr":
----------
name
New name.
Examples
--------
>>> df = pl.DataFrame({
>>> "a": [1, 2, 3],
>>> "b": ["a", "b", None]
>>> })
>>> df
shape: (3, 2)
╭─────┬──────╮
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ str │
╞═════╪══════╡
│ 1 ┆ "a" │
├╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 2 ┆ "b" │
├╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 3 ┆ null │
╰─────┴──────╯
>>> df.select([
>>> col("a").alias("bar"),
>>> col("b").alias("foo")
>>> ])
shape: (3, 2)
╭─────┬──────╮
│ bar ┆ foo │
│ --- ┆ --- │
│ i64 ┆ str │
╞═════╪══════╡
│ 1 ┆ "a" │
├╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 2 ┆ "b" │
├╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 3 ┆ null │
╰─────┴──────╯
"""
return wrap_expr(self._pyexpr.alias(name))

def keep_name(self) -> "Expr":
"""
Keep the original root name of the expression.
Examples
--------
A groupby aggregation often changes the name of a column.
With `keep_name` we can keep the original name of the column
>>> df = pl.DataFrame({
>>> "a": [1, 2, 3],
>>> "b": ["a", "b", None]
>>> })
>>> (df.groupby("a")
>>> .agg(col("b").list())
>>> .sort(by="a")
>>> )
shape: (3, 2)
╭─────┬────────────╮
│ a ┆ b_agg_list │
│ --- ┆ --- │
│ i64 ┆ list [str] │
╞═════╪════════════╡
│ 1 ┆ [a] │
├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2 ┆ [b] │
├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 3 ┆ [null] │
╰─────┴────────────╯
>>> # keep the original column name
>>> (df.groupby("a")
>>> .agg(col("b").list().keep_name())
>>> .sort(by="a")
>>> )
shape: (3, 2)
╭─────┬────────────╮
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ list [str] │
╞═════╪════════════╡
│ 1 ┆ [a] │
├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2 ┆ [b] │
├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 3 ┆ [null] │
╰─────┴────────────╯
"""

return wrap_expr(self._pyexpr.keep_name())

def is_not(self) -> "Expr":
"""
Negate a boolean expression.
Expand Down Expand Up @@ -350,6 +440,7 @@ def take(self, index: "Expr") -> "Expr":
-------
Values taken by index
"""
index = expr_to_lit_or_expr(index, str_to_lit=False)
return wrap_expr(self._pyexpr.take(index._pyexpr))

def shift(self, periods: int) -> "Expr":
Expand Down
3 changes: 3 additions & 0 deletions py-polars/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,9 @@ impl PyExpr {
pub fn mode(&self) -> PyExpr {
self.inner.clone().mode().into()
}
pub fn keep_name(&self) -> PyExpr {
self.inner.clone().keep_name().into()
}
}

impl From<dsl::Expr> for PyExpr {
Expand Down

0 comments on commit 5ef8531

Please sign in to comment.