Skip to content

Commit

Permalink
don't overwrite existing offset variable (#2637)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Feb 13, 2022
1 parent 096095c commit 9c875c9
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 18 deletions.
1 change: 1 addition & 0 deletions polars/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ clippy-default:
cargo clippy -Z unstable-options

test:
POLARS_MAX_THREADS=4 cargo t -p polars-core test_4_threads
cargo test --all-features \
-p polars-lazy \
-p polars-io \
Expand Down
32 changes: 31 additions & 1 deletion polars/polars-core/src/frame/hash_join/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1438,8 +1438,8 @@ impl DataFrame {

#[cfg(test)]
mod test {
use crate::df;
use crate::prelude::*;
use crate::{df, POOL};

fn create_frames() -> (DataFrame, DataFrame) {
let s0 = Series::new("days", &[0, 1, 2]);
Expand Down Expand Up @@ -1994,4 +1994,34 @@ mod test {
assert_eq!(out.shape(), (9, 1));
Ok(())
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_4_threads_bit_offset() -> Result<()> {
// run this locally with a thread pool size of 4
// this was an obscure bug caused by not taking the offset of a bit into account.
let n = 8i64;
let mut left_a = (0..n).map(Some).collect::<Int64Chunked>();
let mut left_b = (0..n)
.map(|i| if i % 2 == 0 { None } else { Some(0) })
.collect::<Int64Chunked>();
left_a.rename("a");
left_b.rename("b");
let left_df = DataFrame::new(vec![left_a.into_series(), left_b.into_series()])?;

let i = 1;
let len = 8;
let range = i..i + len;
let mut right_a = range.clone().map(Some).collect::<Int64Chunked>();
let mut right_b = range
.map(|i| if i % 3 == 0 { None } else { Some(1) })
.collect::<Int64Chunked>();
right_a.rename("a");
right_b.rename("b");

let right_df = DataFrame::new(vec![right_a.into_series(), right_b.into_series()])?;
let out = left_df.join(&right_df, ["a", "b"], ["a", "b"], JoinType::Inner, None)?;
assert_eq!(out.shape(), (1, 2));
Ok(())
}
}
21 changes: 4 additions & 17 deletions polars/polars-core/src/vector_hasher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@ where
let mut offset = 0;
self.downcast_iter().for_each(|arr| {
if let Some(validity) = arr.validity() {
let (slice, offset, _) = validity.as_slice();
let (slice, byte_offset, _) = validity.as_slice();
(0..validity.len())
.map(|i| unsafe { get_bit_unchecked(slice, i + offset) })
.map(|i| unsafe { get_bit_unchecked(slice, i + byte_offset) })
.zip(&mut hashes[offset..])
.for_each(|(valid, h)| {
*h = [null_h, *h][valid as usize];
Expand Down Expand Up @@ -98,9 +98,9 @@ where
}),
_ => {
let validity = arr.validity().unwrap();
let (slice, offset, _) = validity.as_slice();
let (slice, byte_offset, _) = validity.as_slice();
(0..validity.len())
.map(|i| unsafe { get_bit_unchecked(slice, i) })
.map(|i| unsafe { get_bit_unchecked(slice, i + byte_offset) })
.zip(&mut hashes[offset..])
.zip(arr.values().as_slice())
.for_each(|((valid, h), l)| {
Expand All @@ -109,19 +109,6 @@ where
*h,
)
});

// arr
// .iter()
// .zip(&mut hashes[offset..])
// .for_each(|(opt_v, h)| match opt_v {
// Some(v) => {
// let l = T::Native::get_hash(v, &random_state);
// *h = boost_hash_combine(l, *h)
// }
// None => {
// *h = boost_hash_combine(null_h, *h);
// }
// })
}
}
offset += arr.len();
Expand Down

0 comments on commit 9c875c9

Please sign in to comment.