Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(rust,python): support horizontal concatenation of LazyFrames (#1…
…3139) Co-authored-by: Ritchie Vink <ritchie46@gmail.com>
- Loading branch information
Showing
31 changed files
with
631 additions
and
53 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
use polars_core::functions::concat_df_horizontal; | ||
|
||
use super::*; | ||
|
||
pub(crate) struct HConcatExec { | ||
pub(crate) inputs: Vec<Box<dyn Executor>>, | ||
pub(crate) options: HConcatOptions, | ||
} | ||
|
||
impl Executor for HConcatExec { | ||
fn execute(&mut self, state: &mut ExecutionState) -> PolarsResult<DataFrame> { | ||
#[cfg(debug_assertions)] | ||
{ | ||
if state.verbose() { | ||
println!("run HConcatExec") | ||
} | ||
} | ||
let mut inputs = std::mem::take(&mut self.inputs); | ||
|
||
let dfs = if !self.options.parallel { | ||
if state.verbose() { | ||
println!("HCONCAT: `parallel=false` hconcat is run sequentially") | ||
} | ||
let mut dfs = Vec::with_capacity(inputs.len()); | ||
for (idx, mut input) in inputs.into_iter().enumerate() { | ||
let mut state = state.split(); | ||
state.branch_idx += idx; | ||
|
||
let df = input.execute(&mut state)?; | ||
|
||
dfs.push(df); | ||
} | ||
dfs | ||
} else { | ||
if state.verbose() { | ||
println!("HCONCAT: hconcat is run in parallel") | ||
} | ||
// We don't use par_iter directly because the LP may also start threads for every LP (for instance scan_csv) | ||
// this might then lead to a rayon SO. So we take a multitude of the threads to keep work stealing | ||
// within bounds | ||
let out = POOL.install(|| { | ||
inputs | ||
.chunks_mut(POOL.current_num_threads() * 3) | ||
.map(|chunk| { | ||
chunk | ||
.into_par_iter() | ||
.enumerate() | ||
.map(|(idx, input)| { | ||
let mut input = std::mem::take(input); | ||
let mut state = state.split(); | ||
state.branch_idx += idx; | ||
input.execute(&mut state) | ||
}) | ||
.collect::<PolarsResult<Vec<_>>>() | ||
}) | ||
.collect::<PolarsResult<Vec<_>>>() | ||
}); | ||
out?.into_iter().flatten().collect() | ||
}; | ||
|
||
concat_df_horizontal(&dfs) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.