Skip to content

Commit

Permalink
exclude expression
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Aug 5, 2021
1 parent 5ef8531 commit 371373e
Show file tree
Hide file tree
Showing 13 changed files with 152 additions and 129 deletions.
56 changes: 31 additions & 25 deletions polars/polars-lazy/src/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use std::{
};
// reexport the lazy method
pub use crate::frame::IntoLazy;
use polars_core::frame::select::Selection;
use polars_core::utils::get_supertype;

/// A wrapper trait for any closure `Fn(Vec<Series>) -> Result<Series>`
Expand Down Expand Up @@ -260,7 +261,7 @@ pub enum Expr {
output_field: NoEq<Arc<dyn BinaryUdfOutputField>>,
},
/// Can be used in a select statement to exclude a column from selection
Except(Box<Expr>),
Exclude(Box<Expr>, Vec<Arc<String>>),
/// Set root name as Alias
KeepName(Box<Expr>),
}
Expand Down Expand Up @@ -353,35 +354,12 @@ impl fmt::Debug for Expr {
length,
} => write!(f, "SLICE {:?} offset: {} len: {}", input, offset, length),
Wildcard => write!(f, "*"),
Except(column) => write!(f, "EXCEPT {:?}", column),
Exclude(column, names) => write!(f, "{:?}, EXCEPT {:?}", column, names),
KeepName(e) => write!(f, "KEEP NAME {:?}", e),
}
}
}

/// Exclude a column from selection.
///
/// # Example
///
/// ```rust
/// use polars_core::prelude::*;
/// use polars_lazy::prelude::*;
///
/// // Select all columns except foo.
/// fn example(df: DataFrame) -> LazyFrame {
/// df.lazy()
/// .select(&[
/// col("*"), except("foo")
/// ])
/// }
/// ```
pub fn except(name: &str) -> Expr {
match name {
"*" => panic!("cannot use a wildcard as a column exception"),
_ => Expr::Except(Box::new(col(name))),
}
}

#[derive(Debug, Copy, Clone, PartialEq)]
pub enum Operator {
Eq,
Expand Down Expand Up @@ -1171,6 +1149,34 @@ impl Expr {
pub fn keep_name(self) -> Expr {
Expr::KeepName(Box::new(self))
}

/// Exclude a column from a wildcard selection
///
/// # Example
///
/// ```rust
/// use polars_core::prelude::*;
/// use polars_lazy::prelude::*;
///
/// // Select all columns except foo.
/// fn example(df: DataFrame) -> LazyFrame {
/// df.lazy()
/// .select(&[
/// col("*").exclude(&["foo"])
/// ])
/// }
/// ```
pub fn exclude<'a, S, J>(self, columns: S) -> Expr
where
S: Selection<'a, J>,
{
let v = columns
.to_selection_vec()
.iter()
.map(|s| Arc::new(s.to_string()))
.collect();
Expr::Exclude(Box::new(self), v)
}
}

/// Create a Column Expression based on a column name.
Expand Down
32 changes: 14 additions & 18 deletions polars/polars-lazy/src/frame.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1920,24 +1920,6 @@ mod test {
)
}

#[test]
fn test_select_except() {
let df = df! {
"foo" => &[1, 1, 2, 2, 3],
"bar" => &[1.0, 1.0, 2.0, 2.0, 3.0],
"ham" => &[1.0, 1.0, 2.0, 2.0, 3.0]
}
.unwrap();

let out = df
.lazy()
.select(&[col("*"), except("foo")])
.collect()
.unwrap();

assert_eq!(out.get_column_names(), &["ham", "bar"]);
}

#[test]
fn test_lazy_groupby_apply() {
let df = df! {
Expand Down Expand Up @@ -2474,4 +2456,18 @@ mod test {
assert_eq!(out.get_column_names(), &["a", "b"]);
Ok(())
}

#[test]
fn test_exclude() -> Result<()> {
let df = df![
"a" => [1, 2, 3],
"b" => [1, 2, 3],
"c" => [1, 2, 3]
]?;

let out = df.lazy().select(vec![col("*").exclude(&["b"])]).collect()?;

assert_eq!(out.get_column_names(), &["a", "c"]);
Ok(())
}
}
2 changes: 0 additions & 2 deletions polars/polars-lazy/src/logical_plan/aexpr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ pub enum AExpr {
/// Delays output type evaluation until input schema is known.
output_field: NoEq<Arc<dyn BinaryUdfOutputField>>,
},
Except(Node),
}

impl Default for AExpr {
Expand Down Expand Up @@ -313,7 +312,6 @@ impl AExpr {
Shift { input, .. } => arena.get(*input).to_field(schema, ctxt, arena),
Slice { input, .. } => arena.get(*input).to_field(schema, ctxt, arena),
Wildcard => panic!("should be no wildcard at this point"),
Except(_) => panic!("should be no except at this point"),
}
}

Expand Down
5 changes: 2 additions & 3 deletions polars/polars-lazy/src/logical_plan/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,8 @@ pub(crate) fn to_aexpr(expr: Expr, arena: &mut Arena<AExpr>) -> Node {
length,
},
Expr::Wildcard => AExpr::Wildcard,
Expr::Except(input) => AExpr::Except(to_aexpr(*input, arena)),
Expr::KeepName(_) => panic!("no keep name expected at this point"),
Expr::KeepName(_) => panic!("no keep_name expected at this point"),
Expr::Exclude(_, _) => panic!("no exclude expected at this point"),
};
arena.add(v)
}
Expand Down Expand Up @@ -573,7 +573,6 @@ pub(crate) fn node_to_exp(node: Node, expr_arena: &Arena<AExpr>) -> Expr {
length,
},
AExpr::Wildcard => Expr::Wildcard,
AExpr::Except(node) => Expr::Except(Box::new(node_to_exp(node, expr_arena))),
}
}

Expand Down
3 changes: 1 addition & 2 deletions polars/polars-lazy/src/logical_plan/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ impl<'a> Iterator for ExprIter<'a> {
push(input_a);
push(input_b)
}
Except(e) => push(e),
Exclude(e, _) => push(e),
KeepName(e) => push(e),
}
current_expr
Expand Down Expand Up @@ -193,7 +193,6 @@ impl AExpr {
push(input_a);
push(input_b)
}
Except(input) => push(input),
}
}
}
Expand Down
49 changes: 18 additions & 31 deletions polars/polars-lazy/src/logical_plan/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ use polars_io::{parquet::ParquetReader, SerReader};
use crate::logical_plan::iterator::ArenaExprIter;
use crate::logical_plan::LogicalPlan::DataFrameScan;
use crate::utils::{
combine_predicates_expr, expr_to_root_column_name, expr_to_root_column_names, has_expr,
has_wildcard, rename_expr_root_name,
combine_predicates_expr, expr_to_root_column_names, has_expr, has_wildcard,
rename_expr_root_name,
};
use crate::{prelude::*, utils};
use polars_io::csv::NullValues;
Expand Down Expand Up @@ -827,7 +827,8 @@ fn replace_wildcard_with_column(expr: Expr, column_name: Arc<String>) -> Expr {
},
Expr::Column(_) => expr,
Expr::Literal(_) => expr,
Expr::Except(_) => expr,
// take the inner expression thus removes the exclude
Expr::Exclude(e, _) => replace_wildcard_with_column(*e, column_name),
Expr::KeepName(e) => {
Expr::KeepName(Box::new(replace_wildcard_with_column(*e, column_name)))
}
Expand All @@ -854,20 +855,16 @@ fn rewrite_keep_name(expr: Expr) -> Expr {
/// In other cases replace the wildcard with an expression with all columns
fn rewrite_projections(exprs: Vec<Expr>, schema: &Schema) -> Vec<Expr> {
let mut result = Vec::with_capacity(exprs.len() + schema.fields().len());
let mut exclude = vec![];
for expr in exprs {
// Columns that are excepted are later removed from the projection.
// This can be ergonomical in combination with a wildcard expression.
if let Expr::Except(column) = &expr {
if let Expr::Column(name) = &**column {
exclude.push(name.clone());
continue;
} else {
panic!("Except expression should have column name")
}
}

if has_wildcard(&expr) {
// keep track of column excluded from the wildcard
let mut exclude = vec![];
(&expr).into_iter().for_each(|e| {
if let Expr::Exclude(_, names) = e {
exclude.extend_from_slice(names)
}
});

// if count wildcard. count one column
if has_expr(&expr, |e| matches!(e, Expr::Agg(AggExpr::Count(_)))) {
let new_name = Arc::new(schema.field(0).unwrap().name().clone());
Expand Down Expand Up @@ -932,28 +929,18 @@ fn rewrite_projections(exprs: Vec<Expr>, schema: &Schema) -> Vec<Expr> {

for field in schema.fields() {
let name = field.name();
let new_expr = replace_wildcard_with_column(expr.clone(), Arc::new(name.clone()));
let new_expr = rewrite_keep_name(new_expr);
result.push(new_expr)
if !exclude.iter().any(|exluded| &**exluded == name) {
let new_expr =
replace_wildcard_with_column(expr.clone(), Arc::new(name.clone()));
let new_expr = rewrite_keep_name(new_expr);
result.push(new_expr)
}
}
} else {
let expr = rewrite_keep_name(expr);
result.push(expr)
};
}
if !exclude.is_empty() {
for name in exclude {
let idx = result
.iter()
.position(|expr| match expr_to_root_column_name(expr) {
Ok(column_name) => column_name == name,
Err(_) => false,
});
if let Some(idx) = idx {
result.swap_remove(idx);
}
}
}
result
}

Expand Down
1 change: 0 additions & 1 deletion polars/polars-lazy/src/physical_plan/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -923,7 +923,6 @@ impl DefaultPlanner {
}))
}
Wildcard => panic!("should be no wildcard at this point"),
Except(_) => panic!("should be no except at this point"),
}
}
}
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/expression.rst
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ Column names

Expr.alias
Expr.keep_name
Expr.exclude

Apply
-----
Expand Down
1 change: 0 additions & 1 deletion py-polars/docs/source/reference/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ These functions can be used as expression and sometimes also in eager contexts.
:toctree: api/

col
except_
count
to_list
std
Expand Down
82 changes: 82 additions & 0 deletions py-polars/polars/lazy/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,54 @@ def alias(self, name: str) -> "Expr":
"""
return wrap_expr(self._pyexpr.alias(name))

def exclude(self, columns: Union[str, tp.List[str]]) -> "Expr":
"""
Exclude certain columns from a wildcard expression.
Parameters
----------
columns
Column(s) to exclude from selection
Examples
--------
>>> df = pl.DataFrame({
>>> "a": [1, 2, 3],
>>> "b": ["a", "b", None],
>>> "c": [None, 2, 1]
>>> })
>>> df
shape: (3, 3)
╭─────┬──────┬──────╮
│ a ┆ b ┆ c │
│ --- ┆ --- ┆ --- │
│ i64 ┆ str ┆ i64 │
╞═════╪══════╪══════╡
│ 1 ┆ "a" ┆ null │
├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 2 ┆ "b" ┆ 2 │
├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 3 ┆ null ┆ 1 │
╰─────┴──────┴──────╯
>>> df.select(col("*").exclude("b"))
shape: (3, 2)
╭─────┬──────╮
│ a ┆ c │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪══════╡
│ 1 ┆ null │
├╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 2 ┆ 2 │
├╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 3 ┆ 1 │
╰─────┴──────╯
"""
if isinstance(columns, str):
columns = [columns]
return wrap_expr(self._pyexpr.exclude(columns))

def keep_name(self) -> "Expr":
"""
Keep the original root name of the expression.
Expand Down Expand Up @@ -234,6 +282,40 @@ def keep_name(self) -> "Expr":
def is_not(self) -> "Expr":
"""
Negate a boolean expression.
Examples
--------
>>> df = pl.DataFrame({
>>> "a": [True, False, False],
>>> "b": ["a", "b", None],
>>> })
shape: (3, 2)
╭───────┬──────╮
│ a ┆ b │
│ --- ┆ --- │
│ bool ┆ str │
╞═══════╪══════╡
│ true ┆ "a" │
├╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ false ┆ "b" │
├╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ false ┆ null │
╰───────┴──────╯
>>> df.select(col("a").is_not())
shape: (3, 1)
╭───────╮
│ a │
│ --- │
│ bool │
╞═══════╡
│ false │
├╌╌╌╌╌╌╌┤
│ true │
├╌╌╌╌╌╌╌┤
│ true │
╰───────╯
"""
return wrap_expr(self._pyexpr.is_not())

Expand Down

0 comments on commit 371373e

Please sign in to comment.