Skip to content

Commit

Permalink
use schema in 'with_columns' to amortize lookups and fix bug in emptr…
Browse files Browse the repository at this point in the history
…y rows case (#2949)
  • Loading branch information
ritchie46 committed Mar 23, 2022
1 parent e7095a2 commit 7791655
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 6 deletions.
65 changes: 60 additions & 5 deletions polars/polars-core/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1006,6 +1006,15 @@ impl DataFrame {
self.insert_at_idx_no_name_check(index, series)
}

fn add_column_by_search(&mut self, series: Series) -> Result<()> {
if let Some(idx) = self.find_idx_by_name(series.name()) {
self.replace_at_idx(idx, series)?;
} else {
self.columns.push(series);
}
Ok(())
}

/// Add a new column to this `DataFrame` or replace an existing one.
pub fn with_column<S: IntoSeries>(&mut self, column: S) -> Result<&mut Self> {
let mut series = column.into_series();
Expand All @@ -1016,17 +1025,63 @@ impl DataFrame {
}

if series.len() == height || self.is_empty() {
if let Some(idx) = self.find_idx_by_name(series.name()) {
self.replace_at_idx(idx, series)?;
self.add_column_by_search(series)?;
Ok(self)
}
// special case for literals
else if height == 0 && series.len() == 1 {
let s = series.slice(0, 0);
self.add_column_by_search(s)?;
Ok(self)
} else {
Err(PolarsError::ShapeMisMatch(
format!(
"Could not add column. The Series length {} differs from the DataFrame height: {}",
series.len(),
self.height()
)
.into(),
))
}
}

fn add_column_by_schema(&mut self, s: Series, schema: &Schema) -> Result<()> {
if let Some((idx, name, _)) = schema.get_full(s.name()) {
// schema is incorrect fallback to search
if name != s.name() {
self.add_column_by_search(s)?;
} else {
self.columns.push(series);
self.replace_at_idx(idx, s)?;
}
} else {
self.columns.push(s);
}
Ok(())
}

/// Add a new column to this `DataFrame` or replace an existing one.
/// Uses an existing schema to amortize lookups.
/// If the schema is incorrect, we will fallback to linear search.
pub fn with_column_and_schema<S: IntoSeries>(
&mut self,
column: S,
schema: &Schema,
) -> Result<&mut Self> {
let mut series = column.into_series();

let height = self.height();
if series.len() == 1 && height > 1 {
series = series.expand_at_index(0, height);
}

if series.len() == height || self.is_empty() {
self.add_column_by_schema(series, schema)?;
Ok(self)
}
// special case for literals
else if height == 0 && series.len() == 1 {
let s = series.slice(0, 0);
self.columns.push(s);
self.add_column_by_schema(s, schema)?;
Ok(self)
} else {
Err(PolarsError::ShapeMisMatch(
Expand All @@ -1035,7 +1090,7 @@ impl DataFrame {
series.len(),
self.height()
)
.into(),
.into(),
))
}
}
Expand Down
3 changes: 2 additions & 1 deletion polars/polars-lazy/src/physical_plan/executors/stack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@ impl Executor for StackExec {
state.clear_schema_cache();
state.clear_expr_cache();

let schema = &*self.input_schema;
for s in res {
df.with_column(s)?;
df.with_column_and_schema(s, schema)?;
}

Ok(df)
Expand Down
1 change: 1 addition & 0 deletions polars/tests/it/lazy/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ mod expressions;
mod groupby_dynamic;
mod predicate_queries;
mod projection_queries;
mod queries;

use polars::prelude::*;

Expand Down
17 changes: 17 additions & 0 deletions polars/tests/it/lazy/queries.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
use super::*;

#[test]
fn test_with_duplicate_column_empty_df() {
let a = Int32Chunked::from_slice("a", &[]);

assert_eq!(
DataFrame::new(vec![a.into_series()])
.unwrap()
.lazy()
.with_columns([lit(true).alias("a")])
.collect()
.unwrap()
.get_column_names(),
&["a"]
);
}

0 comments on commit 7791655

Please sign in to comment.