Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: fix oob in set operations #12736

Merged
merged 1 commit into from
Nov 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions crates/polars-core/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ pub(crate) const FMT_TABLE_ROUNDED_CORNERS: &str = "POLARS_FMT_TABLE_ROUNDED_COR
pub(crate) const FMT_TABLE_CELL_LIST_LEN: &str = "POLARS_FMT_TABLE_CELL_LIST_LEN";

// Other env vars
#[cfg(feature = "dtype-decimal")]
#[cfg(all(feature = "dtype-decimal", feature = "python"))]
pub(crate) const DECIMAL_ACTIVE: &str = "POLARS_ACTIVATE_DECIMAL";

#[cfg(feature = "dtype-decimal")]
#[cfg(all(feature = "dtype-decimal", feature = "python"))]
pub(crate) fn decimal_is_active() -> bool {
std::env::var(DECIMAL_ACTIVE).as_deref().unwrap_or("") == "1"
}
Expand Down
103 changes: 53 additions & 50 deletions crates/polars-ops/src/chunked_array/list/sets.rs
Original file line number Diff line number Diff line change
Expand Up @@ -259,57 +259,60 @@ fn binary(
} else {
offsets_b
};
let first_a = offsets_a[0];
let second_a = offsets_a[1];
let first_b = offsets_b[0];
let second_b = offsets_b[1];
for i in 1..offsets_slice.len() {
unsafe {
let start_a = *offsets_a.get_unchecked(i - 1) as usize;
let end_a = *offsets_a.get_unchecked(i) as usize;

let start_b = *offsets_b.get_unchecked(i - 1) as usize;
let end_b = *offsets_b.get_unchecked(i) as usize;

// The branches are the same every loop.
// We rely on branch prediction here.
let offset = if broadcast_rhs {
// going via skip iterator instead of slice doesn't heap alloc nor trigger a bitcount
let a_iter = a.into_iter().skip(start_a).take(end_a - start_a);
let b_iter = b.into_iter();
set_operation(
&mut set,
&mut set2,
a_iter,
b_iter,
&mut values_out,
set_op,
true,
)
} else if broadcast_lhs {
let a_iter = a.into_iter();
let b_iter = b.into_iter().skip(start_b).take(end_b - start_b);
set_operation(
&mut set,
&mut set2,
a_iter,
b_iter,
&mut values_out,
set_op,
false,
)
} else {
// going via skip iterator instead of slice doesn't heap alloc nor trigger a bitcount
let a_iter = a.into_iter().skip(start_a).take(end_a - start_a);
let b_iter = b.into_iter().skip(start_b).take(end_b - start_b);
set_operation(
&mut set,
&mut set2,
a_iter,
b_iter,
&mut values_out,
set_op,
false,
)
};
offsets.push(offset as i64);
}
// If we go OOB we take the first element as we are then broadcasting.
let start_a = *offsets_a.get(i - 1).unwrap_or(&first_a) as usize;
let end_a = *offsets_a.get(i).unwrap_or(&second_a) as usize;

let start_b = *offsets_b.get(i - 1).unwrap_or(&first_b) as usize;
let end_b = *offsets_b.get(i).unwrap_or(&second_b) as usize;

// The branches are the same every loop.
// We rely on branch prediction here.
let offset = if broadcast_rhs {
// going via skip iterator instead of slice doesn't heap alloc nor trigger a bitcount
let a_iter = a.into_iter().skip(start_a).take(end_a - start_a);
let b_iter = b.into_iter();
set_operation(
&mut set,
&mut set2,
a_iter,
b_iter,
&mut values_out,
set_op,
true,
)
} else if broadcast_lhs {
let a_iter = a.into_iter();
let b_iter = b.into_iter().skip(start_b).take(end_b - start_b);
set_operation(
&mut set,
&mut set2,
a_iter,
b_iter,
&mut values_out,
set_op,
false,
)
} else {
// going via skip iterator instead of slice doesn't heap alloc nor trigger a bitcount
let a_iter = a.into_iter().skip(start_a).take(end_a - start_a);
let b_iter = b.into_iter().skip(start_b).take(end_b - start_b);
set_operation(
&mut set,
&mut set2,
a_iter,
b_iter,
&mut values_out,
set_op,
false,
)
};
offsets.push(offset as i64);
}
let offsets = unsafe { OffsetsBuffer::new_unchecked(offsets.into()) };
let values: BinaryArray<i64> = values_out.into();
Expand Down