Skip to content

Commit

Permalink
slice singleton columns that are added to df with 0 rows
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jan 29, 2022
1 parent 058a8e1 commit e44b4a3
Show file tree
Hide file tree
Showing 8 changed files with 65 additions and 10 deletions.
9 changes: 7 additions & 2 deletions polars/polars-core/src/chunked_array/ops/chunkops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ fn slice(
slice_length: usize,
own_length: usize,
) -> Vec<ArrayRef> {
let mut new_chunks = Vec::with_capacity(1);
let (raw_offset, slice_len) = slice_offsets(offset, slice_length, own_length);

let mut remaining_length = slice_len;
let mut remaining_offset = raw_offset;
let mut new_chunks = Vec::with_capacity(1);

for chunk in chunks {
let chunk_len = chunk.len();
Expand All @@ -33,7 +33,12 @@ fn slice(
remaining_length
};

new_chunks.push(chunk.slice(remaining_offset, take_len).into());
debug_assert!(remaining_offset + take_len <= chunk.len());
unsafe {
// Safety:
// this function ensures the slices are in bounds
new_chunks.push(chunk.slice_unchecked(remaining_offset, take_len).into());
}
remaining_length -= take_len;
remaining_offset = 0;
if remaining_length == 0 {
Expand Down
29 changes: 28 additions & 1 deletion polars/polars-core/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -993,13 +993,19 @@ impl DataFrame {
series = series.expand_at_index(0, height);
}

if series.len() == height || self.is_empty() || series.len() == 1 {
if series.len() == height || self.is_empty() {
if let Some(idx) = self.find_idx_by_name(series.name()) {
self.replace_at_idx(idx, series)?;
} else {
self.columns.push(series);
}
Ok(self)
}
// special case for literals
else if height == 0 && series.len() == 1 {
let s = series.slice(0, 0);
self.columns.push(s);
Ok(self)
} else {
Err(PolarsError::ShapeMisMatch(
format!(
Expand Down Expand Up @@ -3058,4 +3064,25 @@ mod test {
assert_eq!(df.get_column_names(), &["a", "b", "c"]);
Ok(())
}

#[test]
fn test_empty_df_hstack() -> Result<()> {
let mut base = df!(
"a" => [1, 2, 3],
"b" => [1, 2, 3]
)?;

// has got columns, but no rows
let mut df = base.slice(0, 0);
let out = df.with_column(Series::new("c", [1]))?;
assert_eq!(out.shape(), (0, 3));
assert!(out.iter().all(|s| s.len() == 0));

// no columns
base.columns = vec![];
let out = base.with_column(Series::new("c", [1]))?;
assert_eq!(out.shape(), (1, 1));

Ok(())
}
}
9 changes: 5 additions & 4 deletions polars/polars-lazy/src/dot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ impl LogicalPlan {
/// # Arguments
/// `id` - (branch, id)
/// Used to make sure that the dot boxes are distinct.
/// branch is an id per join branch
/// branch is an id per join/union branch
/// id is incremented by the depth traversal of the tree.
#[cfg_attr(docsrs, doc(cfg(feature = "dot_diagram")))]
pub(crate) fn dot(
Expand All @@ -60,13 +60,14 @@ impl LogicalPlan {
prev_node: &str,
) -> std::fmt::Result {
use LogicalPlan::*;
let (branch, id) = id;
let (mut branch, id) = id;
match self {
Union { inputs, .. } => {
for input in inputs {
let current_node = format!("UNION [{:?}]", (branch, id));
self.write_dot(acc_str, prev_node, &current_node, id)?;
input.dot(acc_str, (branch, id + 1), &current_node)?
input.dot(acc_str, (branch, id + 1), &current_node)?;
branch += 1;
}
Ok(())
}
Expand Down Expand Up @@ -185,7 +186,7 @@ impl LogicalPlan {
}
s_keys.pop();
s_keys.push(']');
let current_node = format!("AGG {:?} BY {} [{:?}]", aggs, s_keys, (branch, id));
let current_node = format!("AGG {:?}\nBY\n{} [{:?}]", aggs, s_keys, (branch, id));
self.write_dot(acc_str, prev_node, &current_node, id)?;
input.dot(acc_str, (branch, id + 1), &current_node)
}
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-lazy/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -975,7 +975,7 @@ impl LazyFrame {
name: Option<&'static str>,
) -> LazyFrame
where
F: DataFrameUdf + 'static,
F: 'static + Fn(DataFrame) -> Result<DataFrame> + Send + Sync,
{
let opt_state = self.get_opt_state();
let lp = self
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@ impl PredicatePushDown {
{
self.pushdown_and_continue(lp, acc_predicates, lp_arena, expr_arena, false)
} else {
Ok(lp)
self.no_pushdown_restart_opt(lp, acc_predicates, lp_arena, expr_arena)
}
}
// Pushed down passed these nodes
Expand Down
2 changes: 2 additions & 0 deletions polars/polars-lazy/src/physical_plan/executors/stack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ impl Executor for StackExec {
let mut df = self.input.execute(state)?;

let res = if self.has_windows {
// we have a different run here
// to ensure the window functions run sequential and share caches
execute_projection_cached_window_fns(&df, &self.expr, state)?
} else {
POOL.install(|| {
Expand Down
20 changes: 20 additions & 0 deletions polars/polars-lazy/src/tests/predicate_queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,23 @@ fn filter_added_column_issue_2470() -> Result<()> {

Ok(())
}

#[test]
fn filter_blocked_by_map() -> Result<()> {
let df = fruits_cars();

let allowed = AllowedOptimizations {
predicate_pushdown: false,
..Default::default()
};
let q = df
.lazy()
.map(|df| Ok(df), Some(allowed), None, None)
.filter(col("A").gt(lit(2i32)));

assert!(!predicate_at_scan(q.clone()));
let out = q.collect()?;
assert_eq!(out.shape(), (3, 4));

Ok(())
}
2 changes: 1 addition & 1 deletion py-polars/polars/internals/lazy_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1393,7 +1393,7 @@ def map(
no_optimizations
Turn off all optimizations past this point.
"""
if not no_optimizations:
if no_optimizations:
predicate_pushdown = False
projection_pushdown = False
return wrap_ldf(self._ldf.map(f, predicate_pushdown, projection_pushdown))
Expand Down

0 comments on commit e44b4a3

Please sign in to comment.