Skip to content

Commit

Permalink
fix segfault in empty df join
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Nov 19, 2021
1 parent 0186310 commit 42eaf72
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 40 deletions.
76 changes: 47 additions & 29 deletions polars/polars-core/src/chunked_array/ops/take/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ where
let mut chunks = self.downcast_iter();
match indices {
TakeIdx::Array(array) => {
if array.null_count() == array.len() {
return Self::full_null(self.name(), array.len());
}
let array = match (self.has_validity(), self.chunks.len()) {
(false, 1) => {
take_no_null_primitive::<T::Native>(chunks.next().unwrap(), array)
Expand Down Expand Up @@ -168,6 +171,9 @@ impl ChunkTake for BooleanChunked {
let mut chunks = self.downcast_iter();
match indices {
TakeIdx::Array(array) => {
if array.null_count() == array.len() {
return Self::full_null(self.name(), array.len());
}
let array = match self.chunks.len() {
1 => take::take(chunks.next().unwrap(), array).unwrap().into(),
_ => {
Expand Down Expand Up @@ -251,6 +257,9 @@ impl ChunkTake for Utf8Chunked {
let mut chunks = self.downcast_iter();
match indices {
TakeIdx::Array(array) => {
if array.null_count() == array.len() {
return Self::full_null(self.name(), array.len());
}
let array = match self.chunks.len() {
1 => take_utf8_unchecked(chunks.next().unwrap(), array) as ArrayRef,
_ => {
Expand Down Expand Up @@ -333,6 +342,9 @@ impl ChunkTake for ListChunked {
let mut chunks = ca_self.downcast_iter();
match indices {
TakeIdx::Array(array) => {
if array.null_count() == array.len() {
return Self::full_null(self.name(), array.len());
}
let array = match ca_self.chunks.len() {
1 => Arc::new(take_list_unchecked(chunks.next().unwrap(), array)) as ArrayRef,
_ => {
Expand Down Expand Up @@ -426,45 +438,51 @@ impl<T: PolarsObject> ChunkTake for ObjectChunked<T> {
{
// current implementation is suboptimal, every iterator is allocated to UInt32Array
match indices {
TakeIdx::Array(array) => match self.chunks.len() {
1 => {
let values = self.downcast_chunks().get(0).unwrap().values();

let mut ca: Self = array
.into_iter()
.map(|opt_idx| {
opt_idx.map(|idx| values.get_unchecked(*idx as usize).clone())
})
.collect();
ca.rename(self.name());
ca
TakeIdx::Array(array) => {
if array.null_count() == array.len() {
return Self::full_null(self.name(), array.len());
}
_ => {
return if !array.has_validity() {
let iter = array.values().iter().map(|i| *i as usize);

let taker = self.take_rand();
let mut ca: ObjectChunked<T> =
iter.map(|idx| taker.get_unchecked(idx).cloned()).collect();
ca.rename(self.name());
ca
} else {
let iter = array
.into_iter()
.map(|opt_idx| opt_idx.map(|idx| *idx as usize));
let taker = self.take_rand();
match self.chunks.len() {
1 => {
let values = self.downcast_chunks().get(0).unwrap().values();

let mut ca: ObjectChunked<T> = iter
let mut ca: Self = array
.into_iter()
.map(|opt_idx| {
opt_idx.and_then(|idx| taker.get_unchecked(idx).cloned())
opt_idx.map(|idx| values.get_unchecked(*idx as usize).clone())
})
.collect();

ca.rename(self.name());
ca
}
_ => {
return if !array.has_validity() {
let iter = array.values().iter().map(|i| *i as usize);

let taker = self.take_rand();
let mut ca: ObjectChunked<T> =
iter.map(|idx| taker.get_unchecked(idx).cloned()).collect();
ca.rename(self.name());
ca
} else {
let iter = array
.into_iter()
.map(|opt_idx| opt_idx.map(|idx| *idx as usize));
let taker = self.take_rand();

let mut ca: ObjectChunked<T> = iter
.map(|opt_idx| {
opt_idx.and_then(|idx| taker.get_unchecked(idx).cloned())
})
.collect();

ca.rename(self.name());
ca
}
}
}
},
}
TakeIdx::Iter(iter) => {
if self.is_empty() {
return Self::full_null(self.name(), iter.size_hint().0);
Expand Down
49 changes: 38 additions & 11 deletions polars/polars-core/src/frame/hash_join/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1641,27 +1641,54 @@ mod test {

#[test]
#[cfg_attr(miri, ignore)]
fn empty_df_join() {
fn empty_df_join() -> Result<()> {
let empty: Vec<String> = vec![];
let left = DataFrame::new(vec![
let empty_df = DataFrame::new(vec![
Series::new("key", &empty),
Series::new("lval", &empty),
Series::new("eval", &empty),
])
.unwrap();

let right = DataFrame::new(vec![
let df = DataFrame::new(vec![
Series::new("key", &["foo"]),
Series::new("rval", &[4]),
Series::new("aval", &[4]),
])
.unwrap();

let res = left.inner_join(&right, "key", "key");
let out = empty_df.inner_join(&df, "key", "key").unwrap();
assert_eq!(out.height(), 0);
let out = empty_df.left_join(&df, "key", "key").unwrap();
assert_eq!(out.height(), 0);
let out = empty_df.outer_join(&df, "key", "key").unwrap();
assert_eq!(out.height(), 1);
df.left_join(&empty_df, "key", "key")?;
df.inner_join(&empty_df, "key", "key")?;
df.outer_join(&empty_df, "key", "key")?;

assert!(res.is_ok());
assert_eq!(res.unwrap().height(), 0);
right.left_join(&left, "key", "key").unwrap();
right.inner_join(&left, "key", "key").unwrap();
right.outer_join(&left, "key", "key").unwrap();
let empty: Vec<String> = vec![];
let empty_df = DataFrame::new(vec![
Series::new("key", &empty),
Series::new("eval", &empty),
])
.unwrap();

let df = df![
"key" => [1i32, 2],
"vals" => [1, 2],
]?;

// https://github.com/pola-rs/polars/issues/1824
let empty: Vec<i32> = vec![];
let empty_df = DataFrame::new(vec![
Series::new("key", &empty),
Series::new("1val", &empty),
Series::new("2val", &empty),
])?;

let out = df.left_join(&empty_df, "key", "key")?;
assert_eq!(out.shape(), (2, 4));

Ok(())
}

#[test]
Expand Down

0 comments on commit 42eaf72

Please sign in to comment.