Skip to content

Commit

Permalink
don't propagate null values in equality comparison (#2667)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Feb 16, 2022
1 parent 5114d55 commit 2d2db6e
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 14 deletions.
2 changes: 1 addition & 1 deletion polars/polars-arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ description = "Arrow interfaces for Polars DataFrame library"

[dependencies]
# arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "dc516495a5a49ae41d3c68caab4e33dd57337640", default-features = false }
arrow = { package = "arrow2", git = "https://github.com/ritchie46/arrow2", branch = "fix_extend", default-features = false }
arrow = { package = "arrow2", git = "https://github.com/ritchie46/arrow2", branch = "no_null_propagate_comp", default-features = false }
# arrow = { package = "arrow2", version = "0.9", default-features = false, features = ["compute_concatenate"] }
hashbrown = "0.12"
num = "^0.4"
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ package = "arrow2"
# git = "https://github.com/jorgecarleitao/arrow2"
git = "https://github.com/ritchie46/arrow2"
# rev = "dc516495a5a49ae41d3c68caab4e33dd57337640"
branch = "fix_extend"
branch = "no_null_propagate_comp"
# version = "0.9"
default-features = false
features = [
Expand Down
22 changes: 12 additions & 10 deletions polars/polars-core/src/chunked_array/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ where
_ => {
// same length
let (lhs, rhs) = align_chunks_binary(self, rhs);
lhs.comparison(&rhs, |x, y| comparison::eq(x, y))
lhs.comparison(&rhs, |x, y| comparison::eq_and_validity(x, y))
}
}
}
Expand All @@ -106,7 +106,7 @@ where
_ => {
// same length
let (lhs, rhs) = align_chunks_binary(self, rhs);
lhs.comparison(&rhs, |x, y| comparison::neq(x, y))
lhs.comparison(&rhs, |x, y| comparison::neq_and_validity(x, y))
}
}
}
Expand Down Expand Up @@ -257,7 +257,7 @@ impl ChunkCompare<&BooleanChunked> for BooleanChunked {
_ => {
// same length
let (lhs, rhs) = align_chunks_binary(self, rhs);
compare_bools(&lhs, &rhs, |lhs, rhs| comparison::eq(lhs, rhs))
compare_bools(&lhs, &rhs, |lhs, rhs| comparison::eq_and_validity(lhs, rhs))
}
}
}
Expand Down Expand Up @@ -288,7 +288,9 @@ impl ChunkCompare<&BooleanChunked> for BooleanChunked {
_ => {
// same length
let (lhs, rhs) = align_chunks_binary(self, rhs);
compare_bools(&lhs, &rhs, |lhs, rhs| comparison::neq(lhs, rhs))
compare_bools(&lhs, &rhs, |lhs, rhs| {
comparison::neq_and_validity(lhs, rhs)
})
}
}
}
Expand Down Expand Up @@ -468,7 +470,7 @@ impl ChunkCompare<&Utf8Chunked> for Utf8Chunked {
}
// same length
else if self.chunk_id().zip(rhs.chunk_id()).all(|(l, r)| l == r) {
self.comparison(rhs, |l, r| comparison::eq(l, r))
self.comparison(rhs, |l, r| comparison::eq_and_validity(l, r))
} else {
apply_operand_on_chunkedarray_by_iter!(self, rhs, ==)
}
Expand All @@ -491,7 +493,7 @@ impl ChunkCompare<&Utf8Chunked> for Utf8Chunked {
}
// same length
else if self.chunk_id().zip(rhs.chunk_id()).all(|(l, r)| l == r) {
self.comparison(rhs, |l, r| comparison::neq(l, r))
self.comparison(rhs, |l, r| comparison::neq_and_validity(l, r))
} else {
apply_operand_on_chunkedarray_by_iter!(self, rhs, !=)
}
Expand Down Expand Up @@ -616,11 +618,11 @@ where
}

fn equal(&self, rhs: Rhs) -> BooleanChunked {
self.primitive_compare_scalar(rhs, |l, rhs| comparison::eq_scalar(l, rhs))
self.primitive_compare_scalar(rhs, |l, rhs| comparison::eq_scalar_and_validity(l, rhs))
}

fn not_equal(&self, rhs: Rhs) -> BooleanChunked {
self.primitive_compare_scalar(rhs, |l, rhs| comparison::neq_scalar(l, rhs))
self.primitive_compare_scalar(rhs, |l, rhs| comparison::neq_scalar_and_validity(l, rhs))
}

fn gt(&self, rhs: Rhs) -> BooleanChunked {
Expand Down Expand Up @@ -657,10 +659,10 @@ impl ChunkCompare<&str> for Utf8Chunked {
}

fn equal(&self, rhs: &str) -> BooleanChunked {
self.utf8_compare_scalar(rhs, |l, rhs| comparison::eq_scalar(l, rhs))
self.utf8_compare_scalar(rhs, |l, rhs| comparison::eq_scalar_and_validity(l, rhs))
}
fn not_equal(&self, rhs: &str) -> BooleanChunked {
self.utf8_compare_scalar(rhs, |l, rhs| comparison::neq_scalar(l, rhs))
self.utf8_compare_scalar(rhs, |l, rhs| comparison::neq_scalar_and_validity(l, rhs))
}

fn gt(&self, rhs: &str) -> BooleanChunked {
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-io/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ private = []
ahash = "0.7"
anyhow = "1.0"
# arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "dc516495a5a49ae41d3c68caab4e33dd57337640", default-features = false }
arrow = { package = "arrow2", git = "https://github.com/ritchie46/arrow2", branch = "fix_extend", default-features = false }
arrow = { package = "arrow2", git = "https://github.com/ritchie46/arrow2", branch = "no_null_propagate_comp", default-features = false }
# arrow = { package = "arrow2", version = "0.9", default-features = false }
csv-core = { version = "0.1.10", optional = true }
dirs = "4.0"
Expand Down
2 changes: 1 addition & 1 deletion py-polars/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 2d2db6e

Please sign in to comment.