-
-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat[rust, python]: run expressions in pivot (#4553)
- Loading branch information
Showing
16 changed files
with
239 additions
and
64 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
use crate::prelude::*; | ||
|
||
pub trait PhysicalAggExpr { | ||
#[allow(clippy::ptr_arg)] | ||
fn evaluate<'a>(&self, df: &DataFrame, groups: &'a GroupsProxy) -> Result<Series>; | ||
|
||
fn root_name(&self) -> Result<&str>; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
//! Polars lazy does not implement a pivot because it is impossible to know the schema without | ||
//! materializing the whole dataset. This makes a pivot quite a terrible operation for performant | ||
//! workflows. An optimization can never be pushed down passed a pivot. | ||
//! | ||
//! We can do a pivot on an eager `DataFrame` as that is already materialized. The code for the | ||
//! pivot is here, because we want to be able to pass expressions to the pivot operation. | ||
//! | ||
|
||
use polars_core::frame::groupby::PivotAgg; | ||
use polars_core::{frame::groupby::expr::PhysicalAggExpr, prelude::*}; | ||
|
||
use crate::physical_plan::exotic::{prepare_eval_expr, prepare_expression_for_context}; | ||
use crate::physical_plan::state::ExecutionState; | ||
use crate::prelude::*; | ||
|
||
impl PhysicalAggExpr for Expr { | ||
fn evaluate<'a>(&self, df: &DataFrame, groups: &'a GroupsProxy) -> Result<Series> { | ||
let state = ExecutionState::new(); | ||
let dtype = df.get_columns()[0].dtype(); | ||
let phys_expr = prepare_expression_for_context("", self, dtype, Context::Aggregation)?; | ||
phys_expr | ||
.evaluate_on_groups(df, groups, &state) | ||
.map(|mut ac| ac.aggregated()) | ||
} | ||
|
||
fn root_name(&self) -> Result<&str> { | ||
Ok("") | ||
} | ||
} | ||
|
||
pub fn pivot<I0, S0, I1, S1, I2, S2>( | ||
df: &DataFrame, | ||
values: I0, | ||
index: I1, | ||
columns: I2, | ||
agg_expr: Expr, | ||
sort_columns: bool, | ||
) -> Result<DataFrame> | ||
where | ||
I0: IntoIterator<Item = S0>, | ||
S0: AsRef<str>, | ||
I1: IntoIterator<Item = S1>, | ||
S1: AsRef<str>, | ||
I2: IntoIterator<Item = S2>, | ||
S2: AsRef<str>, | ||
{ | ||
// make sure that the root column is replaced | ||
let expr = prepare_eval_expr(agg_expr); | ||
df.pivot( | ||
values, | ||
index, | ||
columns, | ||
PivotAgg::Expr(Arc::new(expr)), | ||
sort_columns, | ||
) | ||
} | ||
|
||
pub fn pivot_stable<I0, S0, I1, S1, I2, S2>( | ||
df: &DataFrame, | ||
values: I0, | ||
index: I1, | ||
columns: I2, | ||
agg_expr: Expr, | ||
sort_columns: bool, | ||
) -> Result<DataFrame> | ||
where | ||
I0: IntoIterator<Item = S0>, | ||
S0: AsRef<str>, | ||
I1: IntoIterator<Item = S1>, | ||
S1: AsRef<str>, | ||
I2: IntoIterator<Item = S2>, | ||
S2: AsRef<str>, | ||
{ | ||
// make sure that the root column is replaced | ||
let expr = prepare_eval_expr(agg_expr); | ||
df.pivot_stable( | ||
values, | ||
index, | ||
columns, | ||
PivotAgg::Expr(Arc::new(expr)), | ||
sort_columns, | ||
) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
use polars_core::prelude::*; | ||
|
||
use crate::prelude::*; | ||
|
||
pub(crate) fn prepare_eval_expr(mut expr: Expr) -> Expr { | ||
expr.mutate().apply(|e| match e { | ||
Expr::Column(name) => { | ||
*name = Arc::from(""); | ||
true | ||
} | ||
Expr::Nth(_) => { | ||
*e = Expr::Column(Arc::from("")); | ||
true | ||
} | ||
_ => true, | ||
}); | ||
expr | ||
} | ||
|
||
pub(crate) fn prepare_expression_for_context( | ||
name: &str, | ||
expr: &Expr, | ||
dtype: &DataType, | ||
ctxt: Context, | ||
) -> Result<Arc<dyn PhysicalExpr>> { | ||
let mut lp_arena = Arena::with_capacity(8); | ||
let mut expr_arena = Arena::with_capacity(10); | ||
|
||
// create a dummy lazyframe and run a very simple optimization run so that | ||
// type coercion and simplify expression optimizations run. | ||
let column = Series::full_null(name, 0, dtype); | ||
let lf = DataFrame::new_no_checks(vec![column]) | ||
.lazy() | ||
.without_optimizations() | ||
.with_simplify_expr(true) | ||
.select([expr.clone()]); | ||
let optimized = lf.optimize(&mut lp_arena, &mut expr_arena).unwrap(); | ||
let lp = lp_arena.get(optimized); | ||
let aexpr = lp.get_exprs().pop().unwrap(); | ||
|
||
let planner = PhysicalPlanner::default(); | ||
planner.create_physical_expr(aexpr, ctxt, &mut expr_arena) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.